Skip to content

Commit

Permalink
Merge pull request #69 from ammar/handle_chained_quantifiers
Browse files Browse the repository at this point in the history
Add simple handling/workaround for chained quantifiers
  • Loading branch information
jaynetics authored Nov 25, 2020
2 parents 76ba1cb + f14a36b commit ee57858
Show file tree
Hide file tree
Showing 7 changed files with 98 additions and 15 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@

- `Regexp::Expression::Base#base_length`
* returns the character count of an expression body, ignoring any quantifier
- pragmatic, experimental support for chained quantifiers
* e.g.: `/^a{10}{4,6}$/` matches exactly 40, 50 or 60 `a`s
* successive quantifiers used to be silently dropped by the parser
* they are now wrapped with passive groups as if they were written `(?:a{10}){4,6}`
* thanks to [calfeld](https://github.com/calfeld) for reporting this a while back

### Fixed

Expand Down
19 changes: 17 additions & 2 deletions lib/regexp_parser/expression/classes/group.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,24 @@ def capturing?; false end
def comment?; false end
end

class Atomic < Group::Base; end
class Passive < Group::Base; end
class Passive < Group::Base
attr_writer :implicit

def to_s(format = :full)
if implicit?
"#{expressions.join}#{quantifier_affix(format)}"
else
super
end
end

def implicit?
@implicit ||= false
end
end

class Absence < Group::Base; end
class Atomic < Group::Base; end
class Options < Group::Base
attr_accessor :option_changes
end
Expand Down
9 changes: 9 additions & 0 deletions lib/regexp_parser/expression/quantifier.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,14 @@ def #{mode}?
RUBY
end
alias :lazy? :reluctant?

def ==(other)
other.class == self.class &&
other.token == token &&
other.mode == mode &&
other.min == min &&
other.max == max
end
alias :eq :==
end
end
27 changes: 27 additions & 0 deletions lib/regexp_parser/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,28 @@ def quantifier(token)
target_node || raise(ArgumentError, 'No valid target found for '\
"'#{token.text}' ")

# in case of chained quantifiers, wrap target in an implicit passive group
# description of the problem: https://github.com/ammar/regexp_parser/issues/3
# rationale for this solution: https://github.com/ammar/regexp_parser/pull/69
if target_node.quantified?
new_token = Regexp::Token.new(
:group,
:passive,
'', # text
target_node.ts,
nil, # te (unused)
target_node.level,
target_node.set_level,
target_node.conditional_level
)
new_group = Group::Passive.new(new_token, active_opts)
new_group.implicit = true
new_group << target_node
increase_level(target_node)
node.expressions[offset] = new_group
target_node = new_group
end

case token.token
when :zero_or_one
target_node.quantify(:zero_or_one, token.text, 0, 1, :greedy)
Expand Down Expand Up @@ -468,6 +490,11 @@ def quantifier(token)
end
end

def increase_level(exp)
exp.level += 1
exp.respond_to?(:each) && exp.each { |subexp| increase_level(subexp) }
end

def interval(target_node, token)
text = token.text
mchr = text[text.length-1].chr =~ /[?+]/ ? text[text.length-1].chr : nil
Expand Down
7 changes: 7 additions & 0 deletions spec/expression/to_s_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,13 @@
expect(Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]).to eq multiline.match(str)[0]
end

# special case: implicit groups used for chained quantifiers produce no parens
specify 'chained quantifiers #to_s' do
pattern = /a+{1}{2}/
root = RP.parse(pattern)
expect(root.to_s).to eq 'a+{1}{2}'
end

# regression test for https://github.com/ammar/regexp_parser/issues/74
specify('non-ascii comment') do
pattern = '(?x) 😋 # 😋'
Expand Down
15 changes: 15 additions & 0 deletions spec/parser/quantifiers_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,21 @@
include_examples 'quantifier', /a{4}+b/, '{4}+', :possessive, :interval, 4, 4
include_examples 'quantifier', /a{004}+b/, '{004}+', :possessive, :interval, 4, 4

# special case: exps with chained quantifiers are wrapped in implicit passive groups
include_examples 'parse', /a+{2}{3}/,
0 => [
:group, :passive, Group::Passive, implicit?: true, level: 0,
quantifier: Quantifier.new(:interval, '{3}', 3, 3, :greedy)
],
[0, 0] => [
:group, :passive, Group::Passive, implicit?: true, level: 1,
quantifier: Quantifier.new(:interval, '{2}', 2, 2, :greedy)
],
[0, 0, 0] => [
:literal, :literal, Literal, text: 'a', level: 2,
quantifier: Quantifier.new(:one_or_more, '+', 1, -1, :greedy)
]

specify('mode-checking methods') do
exp = RP.parse(/a??/).first

Expand Down
31 changes: 18 additions & 13 deletions spec/scanner/quantifiers_spec.rb
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
require 'spec_helper'

RSpec.describe('Quantifier scanning') do
include_examples 'scan', 'a?', 1 => [:quantifier, :zero_or_one, '?', 1, 2]
include_examples 'scan', 'a??', 1 => [:quantifier, :zero_or_one_reluctant, '??', 1, 3]
include_examples 'scan', 'a?+', 1 => [:quantifier, :zero_or_one_possessive, '?+', 1, 3]
include_examples 'scan', 'a?', 1 => [:quantifier, :zero_or_one, '?', 1, 2]
include_examples 'scan', 'a??', 1 => [:quantifier, :zero_or_one_reluctant, '??', 1, 3]
include_examples 'scan', 'a?+', 1 => [:quantifier, :zero_or_one_possessive, '?+', 1, 3]

include_examples 'scan', 'a*', 1 => [:quantifier, :zero_or_more, '*', 1, 2]
include_examples 'scan', 'a*?', 1 => [:quantifier, :zero_or_more_reluctant, '*?', 1, 3]
include_examples 'scan', 'a*+', 1 => [:quantifier, :zero_or_more_possessive, '*+', 1, 3]
include_examples 'scan', 'a*', 1 => [:quantifier, :zero_or_more, '*', 1, 2]
include_examples 'scan', 'a*?', 1 => [:quantifier, :zero_or_more_reluctant, '*?', 1, 3]
include_examples 'scan', 'a*+', 1 => [:quantifier, :zero_or_more_possessive, '*+', 1, 3]

include_examples 'scan', 'a+', 1 => [:quantifier, :one_or_more, '+', 1, 2]
include_examples 'scan', 'a+?', 1 => [:quantifier, :one_or_more_reluctant, '+?', 1, 3]
include_examples 'scan', 'a++', 1 => [:quantifier, :one_or_more_possessive, '++', 1, 3]
include_examples 'scan', 'a+', 1 => [:quantifier, :one_or_more, '+', 1, 2]
include_examples 'scan', 'a+?', 1 => [:quantifier, :one_or_more_reluctant, '+?', 1, 3]
include_examples 'scan', 'a++', 1 => [:quantifier, :one_or_more_possessive, '++', 1, 3]

include_examples 'scan', 'a{2}', 1 => [:quantifier, :interval, '{2}', 1, 4]
include_examples 'scan', 'a{2,}', 1 => [:quantifier, :interval, '{2,}', 1, 5]
include_examples 'scan', 'a{,2}', 1 => [:quantifier, :interval, '{,2}', 1, 5]
include_examples 'scan', 'a{2,4}', 1 => [:quantifier, :interval, '{2,4}', 1, 6]
include_examples 'scan', 'a{2}', 1 => [:quantifier, :interval, '{2}', 1, 4]
include_examples 'scan', 'a{2,}', 1 => [:quantifier, :interval, '{2,}', 1, 5]
include_examples 'scan', 'a{,2}', 1 => [:quantifier, :interval, '{,2}', 1, 5]
include_examples 'scan', 'a{2,4}', 1 => [:quantifier, :interval, '{2,4}', 1, 6]

# special case: chained quantifiers
include_examples 'scan', 'a+{2}{3}', 1 => [:quantifier, :one_or_more, '+', 1, 2]
include_examples 'scan', 'a+{2}{3}', 2 => [:quantifier, :interval, '{2}', 2, 5]
include_examples 'scan', 'a+{2}{3}', 3 => [:quantifier, :interval, '{3}', 5, 8]
end

0 comments on commit ee57858

Please sign in to comment.