Skip to content

Commit

Permalink
reduce compile times
Browse files Browse the repository at this point in the history
Break down the match statement to smaller functions using one function
for each TLD. This reduces compile times, which had shot up from about
30mins in the previous release to about 1hr, down to about 3mins!
  • Loading branch information
rushmorem committed Jul 21, 2018
1 parent 8302292 commit bbe8026
Show file tree
Hide file tree
Showing 8 changed files with 123 additions and 119 deletions.
6 changes: 5 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
sudo: false
language: rust

rust:
Expand All @@ -7,4 +8,7 @@ rust:
- nightly

script:
- cd psl && travis_wait 70 cargo test --release
- cargo test --package psl

cache:
cargo: true
15 changes: 5 additions & 10 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,6 @@ readme = "README.md"
keywords = ["tld", "gtld", "cctld", "domain", "psl"]
authors = ["rushmorem <[email protected]>"]

[features]
default = ["list", "punycode"]
list = ["psl/list"]
punycode = ["psl/punycode"]

[dependencies]
rental = "0.5"
error-chain = "0.12"
Expand All @@ -25,13 +20,13 @@ lazy_static = "1.0"
idna = "0.1"

[dependencies.psl]
version = "0.3"
default-features = false
version = "0.4"
path = "./psl"

[dev-dependencies]
rspec = "=1.0.0-beta.4"
criterion = "0.2"

[package.metadata.docs.rs]
rustc-args = ["--cfg", "addr_docs_rs"]
rustdoc-args = ["--cfg", "addr_docs_rs"]
[[bench]]
name = "list_benchmark"
harness = false
File renamed without changes.
10 changes: 2 additions & 8 deletions codegen/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "psl-codegen"
description = "Generate native Rust code from Mozilla's Public Suffix List"
version = "0.3.3"
version = "0.4.0"
license = "MIT/Apache-2.0"
repository = "https://github.com/addr-rs/addr/tree/master/codegen"
documentation = "https://docs.rs/psl-codegen"
Expand All @@ -13,18 +13,12 @@ authors = ["rushmorem <[email protected]>"]
proc-macro = true

[features]
default = ["prefix", "punycode"]
default = ["prefix"]

# Whether or not to prefix the generated types
# from the `psl` crate with the name of the crate
prefix = []

# Add compile time punycode support
# If you don't need punycode support, disable this
# to reduce compile times by almost half for the
# official list.
punycode = []

[dependencies]
proc-macro2 = "0.4"
quote = "0.6"
Expand Down
166 changes: 104 additions & 62 deletions codegen/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,22 +42,20 @@ pub fn derive_psl(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
false
};

let (labels, iter) = if string_match {
let labels = quote! {
let labels = if string_match {
quote! {
match ::core::str::from_utf8(domain) {
Ok(domain) => domain.rsplit('.'),
Err(_) => {
return info;
}
}
};
(labels, quote!(str))
}
} else {
let labels = quote!(domain.rsplit(|x| *x == b'.'));
(labels, quote!([u8]))
quote!(domain.rsplit(|x| *x == b'.'))
};

let body = body(resources, string_match);
let funcs = process(resources, string_match);

let expanded = quote! {
mod __psl_impl {
Expand All @@ -76,7 +74,7 @@ pub fn derive_psl(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
false
};

info = lookup(labels, info.len, info);
info = lookup(labels, info);

if fqdn && info.len > 0 {
info.len += 1;
Expand All @@ -86,26 +84,17 @@ pub fn derive_psl(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
}
}

#[inline]
fn lookup<'a, T>(mut labels: T, mut len: usize, mut info: Info) -> Info
where T: Iterator<Item=&'a #iter>
{
#body
info
}
#funcs
}
};

expanded.into()
}

#[derive(Debug)]
struct Depth(usize);

#[derive(Debug)]
#[derive(Debug, Clone, Copy)]
struct StringMatch(bool);

fn body(resources: Vec<Uri>, string_match: bool) -> TokenStream {
fn process(resources: Vec<Uri>, string_match: bool) -> TokenStream {
use self::Uri::*;

let mut list = if resources.is_empty() {
Expand Down Expand Up @@ -164,42 +153,109 @@ fn body(resources: Vec<Uri>, string_match: bool) -> TokenStream {
.rev()
.collect();
tree.insert(labels.iter(), suffix.typ);
if cfg!(feature = "punycode") {
let labels: Vec<_> = labels.into_iter().map(|label| {
idna::domain_to_ascii(&label)
.expect(&format!("expected: a label that can be converted to ascii, found: {}", label))
})
.collect();
tree.insert(labels.iter(), suffix.typ);
}
let labels: Vec<_> = labels.into_iter().map(|label| {
idna::domain_to_ascii(&label)
.expect(&format!("expected: a label that can be converted to ascii, found: {}", label))
})
.collect();
tree.insert(labels.iter(), suffix.typ);
}
}

build(tree.children_with_keys(), Depth(0), StringMatch(string_match))
funcs(tree.children_with_keys(), StringMatch(string_match))
}

fn build(list: Vec<(&String, &SequenceTrie<String, Type>)>, Depth(depth): Depth, StringMatch(string_match): StringMatch) -> TokenStream {
fn funcs(
list: Vec<(&String, &SequenceTrie<String, Type>)>,
StringMatch(string_match): StringMatch,
) -> TokenStream {
if list.is_empty() {
if depth == 0 {
if !cfg!(test) {
panic!("
Found empty list. This implementation doesn't support empty lists.
If you do want one, you can easily implement the trait `psl::Psl`
by merely putting `None` in the body.
");
}
return TokenStream::new();
}

let mut body = TokenStream::new();
let mut funcs = TokenStream::new();
let iter = if string_match { quote!(str) } else { quote!([u8]) };

for (i, (label, tree)) in list.into_iter().enumerate() {
let fname = syn::parse_str::<syn::Ident>(&format!("lookup{}", i)).unwrap();
let pat = pat(label, StringMatch(string_match));
body.append_all(quote!{
#pat => #fname(labels, info),
});
let children = build(tree.children_with_keys(), StringMatch(string_match));
let (labels, resize_len) = if children.is_empty() {
let resize_len = quote! {
info.len = #pat.len();
};
(quote!(_), resize_len)
} else {
let resize_len = quote! {
let mut len = #pat.len();
info.len = len;
};
(quote!(mut labels), resize_len)
};
funcs.append_all(quote!{
#[inline]
fn #fname<'a, T>(#labels: T, mut info: Info) -> Info
where T: Iterator<Item=&'a #iter>
{
#resize_len
#children
info
}
});
}

let lookup = quote! {
#[inline]
fn lookup<'a, T>(mut labels: T, mut info: Info) -> Info
where T: Iterator<Item=&'a #iter>
{
match labels.next() {
Some(label) => {
match label {
#body
val => {
info.len = val.len();
info
}
}
}
None => info,
}
}
};

quote![#lookup #funcs]
}

fn pat(label: &str, StringMatch(string_match): StringMatch) -> TokenStream {
if string_match {
quote!(#label)
} else {
let pat = array_expr(label);
quote!(#pat)
}
}

fn build(
list: Vec<(&String, &SequenceTrie<String, Type>)>,
string_match: StringMatch,
) -> TokenStream {
let mut head = TokenStream::new();
let mut body = TokenStream::new();
let mut footer = TokenStream::new();

for (label, tree) in list {
let mut info = if depth == 0 {
// invoke the wildcard rule
quote!(info.len = len;)
} else {
TokenStream::new()
};
let mut info = TokenStream::new();
if let Some(val) = tree.value() {
let t = match *val {
Type::Icann => syn::parse_str::<syn::Type>("Icann").unwrap(),
Expand All @@ -209,19 +265,10 @@ fn build(list: Vec<(&String, &SequenceTrie<String, Type>)>, Depth(depth): Depth,
info = Info { len, typ: Some(Type::#t) };
};
}
let children = build(tree.children_with_keys(), Depth(depth + 1), StringMatch(string_match));
let pat = |label| {
if string_match {
quote!(#label)
} else {
let pat = array_expr(label);
quote!(#pat)
}
};
let plus_1 = if depth > 0 { quote!(+ 1) } else { TokenStream::new() };
let children = build(tree.children_with_keys(), string_match);
if label.starts_with('!') {
let label = label.trim_left_matches('!');
let pat = pat(label);
let pat = pat(label, string_match);
head.append_all(quote! {
#pat => {
#info
Expand All @@ -230,34 +277,29 @@ fn build(list: Vec<(&String, &SequenceTrie<String, Type>)>, Depth(depth): Depth,
} else if label == "_" {
footer.append_all(quote! {
wild => {
len += wild.len() #plus_1;
len += wild.len() + 1;
#info
#children
}
});
} else {
let pat = pat(label);
let pat = pat(label, string_match);
body.append_all(quote! {
#pat => {
len += #pat.len() #plus_1;
len += #pat.len() + 1;
#info
#children
}
});
}
}

if head.is_empty() && body.is_empty() && footer.is_empty() {
return TokenStream::new();
}

if footer.is_empty() {
let eom = if depth == 0 {
quote! {
val => {
info.len = val.len();
}
}
} else {
quote!(_ => {})
};
footer.append_all(eom);
footer.append_all(quote!(_ => {}));
}

quote! {
Expand Down
18 changes: 3 additions & 15 deletions psl/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "psl"
description = "A native library for Mozilla's Public Suffix List"
version = "0.3.6"
version = "0.4.0"
license = "MIT/Apache-2.0"
repository = "https://github.com/addr-rs/addr/tree/master/psl"
documentation = "https://docs.rs/psl"
Expand All @@ -10,35 +10,23 @@ keywords = ["tld", "gtld", "cctld", "domain", "psl"]
authors = ["rushmorem <[email protected]>"]

[features]
default = ["list", "punycode"]
default = ["list"]
list = ["psl-codegen", "serde"]
punycode = ["list", "psl-codegen/punycode"]
dynamic = [ ]

[dependencies.serde]
version = "1.0"
default-features = false
optional = true

[dependencies.psl-codegen]
version = "0.3"
version = "0.4"
default-features = false
path = "../codegen"
optional = true

[dev-dependencies]
psl-lexer = { version = "0.2", path = "../lexer" }
rspec = "=1.0.0-beta.4"
idna = "0.1"
criterion = "0.2"

[build-dependencies]
rustc_version = "0.2"

[package.metadata.docs.rs]
rustc-args = ["--cfg", "psl_docs_rs"]
rustdoc-args = ["--cfg", "psl_docs_rs"]

[[bench]]
name = "list_benchmark"
harness = false
Loading

0 comments on commit bbe8026

Please sign in to comment.