Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Disable all literal optimizations when a pattern is partially anchored. #281

Merged
merged 1 commit into from
Sep 12, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions src/compile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,7 @@ impl Compiler {
// matching engine itself.
let mut dotstar_patch = Patch { hole: Hole::None, entry: 0 };
self.compiled.is_anchored_start = expr.is_anchored_start();
self.compiled.has_anchored_start = expr.has_anchored_start();
self.compiled.is_anchored_end = expr.is_anchored_end();
self.compiled.has_anchored_end = expr.has_anchored_end();
if self.compiled.needs_dotstar() {
dotstar_patch = try!(self.c_dotstar());
self.compiled.start = dotstar_patch.entry;
Expand Down Expand Up @@ -173,10 +171,6 @@ impl Compiler {
exprs.iter().all(|e| e.is_anchored_start());
self.compiled.is_anchored_end =
exprs.iter().all(|e| e.is_anchored_end());
self.compiled.has_anchored_start =
exprs.iter().any(|e| e.has_anchored_start());
self.compiled.has_anchored_end =
exprs.iter().any(|e| e.has_anchored_end());
let mut dotstar_patch = Patch { hole: Hole::None, entry: 0 };
if self.compiled.needs_dotstar() {
dotstar_patch = try!(self.c_dotstar());
Expand Down
16 changes: 13 additions & 3 deletions src/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,13 +222,25 @@ impl ExecBuilder {
.allow_bytes(!self.only_utf8);
let expr = try!(parser.parse(pat));
bytes = bytes || expr.has_bytes();

if !expr.is_anchored_start() && expr.has_anchored_start() {
// Partial anchors unfortunately make it hard to use prefixes,
// so disable them.
prefixes = None;
}
prefixes = prefixes.and_then(|mut prefixes| {
if !prefixes.union_prefixes(&expr) {
None
} else {
Some(prefixes)
}
});

if !expr.is_anchored_end() && expr.has_anchored_end() {
// Partial anchors unfortunately make it hard to use suffixes,
// so disable them.
suffixes = None;
}
suffixes = suffixes.and_then(|mut suffixes| {
if !suffixes.union_suffixes(&expr) {
None
Expand Down Expand Up @@ -1114,9 +1126,7 @@ impl ExecReadOnly {
// create two sets of literals: all of them and then the subset that
// aren't anchored. We would then only search for all of them when at
// the beginning of the input and use the subset in all other cases.
if self.res.len() == 1
&& !self.nfa.has_anchored_start
&& !self.nfa.has_anchored_end {
if self.res.len() == 1 {
if self.nfa.prefixes.complete() {
return if self.nfa.is_anchored_start {
Literal(MatchLiteralType::AnchoredStart)
Expand Down
8 changes: 0 additions & 8 deletions src/prog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,6 @@ pub struct Program {
pub is_anchored_start: bool,
/// Whether the regex must match at the end of the input.
pub is_anchored_end: bool,
/// Whether the regex has at least one matchable sub-expression that must
/// match from the start of the input.
pub has_anchored_start: bool,
/// Whether the regex has at least one matchable sub-expression that must
/// match at the end of the input.
pub has_anchored_end: bool,
/// Whether this program contains a Unicode word boundary instruction.
pub has_unicode_word_boundary: bool,
/// A possibly empty machine for very quickly matching prefix literals.
Expand Down Expand Up @@ -97,8 +91,6 @@ impl Program {
is_reverse: false,
is_anchored_start: false,
is_anchored_end: false,
has_anchored_start: false,
has_anchored_end: false,
has_unicode_word_boundary: false,
prefixes: LiteralSearcher::empty(),
dfa_size_limit: 2 * (1<<20),
Expand Down
11 changes: 11 additions & 0 deletions tests/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,17 @@ macro_rules! findall {

// Macros for automatically producing tests.

macro_rules! ismatch {
($name:ident, $re:expr, $text:expr, $ismatch:expr) => {
#[test]
fn $name() {
let text = text!($text);
let re = regex!($re);
assert!($ismatch == re.is_match(text));
}
};
}

macro_rules! mat(
($name:ident, $re:expr, $text:expr, $($loc:tt)+) => (
#[test]
Expand Down
4 changes: 4 additions & 0 deletions tests/regression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,7 @@ matiter!(partial_anchor, u!(r"^a|b"), "ba", (0, 1));
// See: https://github.com/rust-lang-nursery/regex/issues/264
mat!(ascii_boundary_no_capture, u!(r"(?-u)\B"), "\u{28f3e}", Some((0, 0)));
mat!(ascii_boundary_capture, u!(r"(?-u)(\B)"), "\u{28f3e}", Some((0, 0)));

// See: https://github.com/rust-lang-nursery/regex/issues/280
ismatch!(partial_anchor_alternate_begin, u!(r"^a|z"), "yyyyya", false);
ismatch!(partial_anchor_alternate_end, u!(r"a$|z"), "ayyyyy", false);