Skip to content

Commit

Permalink
regexp/syntax: reject very deeply nested regexps in Parse
Browse files Browse the repository at this point in the history
The regexp code assumes it can recurse over the structure of
a regexp safely. Go's growable stacks make that reasonable
for all plausible regexps, but implausible ones can reach the
“infinite recursion?” stack limit.

This CL limits the depth of any parsed regexp to 1000.
That is, the depth of the parse tree is required to be ≤ 1000.
Regexps that require deeper parse trees will return ErrInternalError.
A future CL will change the error to ErrInvalidDepth,
but using ErrInternalError for now avoids introducing new API
in point releases when this is backported.

Fixes #51112.

Change-Id: I97d2cd82195946eb43a4ea8561f5b95f91fb14c5
Reviewed-on: https://go-review.googlesource.com/c/go/+/384616
Trust: Russ Cox <[email protected]>
Run-TryBot: Russ Cox <[email protected]>
Reviewed-by: Ian Lance Taylor <[email protected]>
  • Loading branch information
rsc committed Feb 10, 2022
1 parent 656d3f4 commit 452f24a
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 2 deletions.
72 changes: 70 additions & 2 deletions src/regexp/syntax/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,29 @@ const (
opVerticalBar
)

// maxHeight is the maximum height of a regexp parse tree.
// It is somewhat arbitrarily chosen, but the idea is to be large enough
// that no one will actually hit in real use but at the same time small enough
// that recursion on the Regexp tree will not hit the 1GB Go stack limit.
// The maximum amount of stack for a single recursive frame is probably
// closer to 1kB, so this could potentially be raised, but it seems unlikely
// that people have regexps nested even this deeply.
// We ran a test on Google's C++ code base and turned up only
// a single use case with depth > 100; it had depth 128.
// Using depth 1000 should be plenty of margin.
// As an optimization, we don't even bother calculating heights
// until we've allocated at least maxHeight Regexp structures.
const maxHeight = 1000

type parser struct {
flags Flags // parse mode flags
stack []*Regexp // stack of parsed expressions
free *Regexp
numCap int // number of capturing groups seen
wholeRegexp string
tmpClass []rune // temporary char class work space
tmpClass []rune // temporary char class work space
numRegexp int // number of regexps allocated
height map[*Regexp]int // regexp height for height limit check
}

func (p *parser) newRegexp(op Op) *Regexp {
Expand All @@ -92,16 +108,52 @@ func (p *parser) newRegexp(op Op) *Regexp {
*re = Regexp{}
} else {
re = new(Regexp)
p.numRegexp++
}
re.Op = op
return re
}

func (p *parser) reuse(re *Regexp) {
if p.height != nil {
delete(p.height, re)
}
re.Sub0[0] = p.free
p.free = re
}

func (p *parser) checkHeight(re *Regexp) {
if p.numRegexp < maxHeight {
return
}
if p.height == nil {
p.height = make(map[*Regexp]int)
for _, re := range p.stack {
p.checkHeight(re)
}
}
if p.calcHeight(re, true) > maxHeight {
panic(ErrInternalError)
}
}

func (p *parser) calcHeight(re *Regexp, force bool) int {
if !force {
if h, ok := p.height[re]; ok {
return h
}
}
h := 1
for _, sub := range re.Sub {
hsub := p.calcHeight(sub, false)
if h < 1+hsub {
h = 1 + hsub
}
}
p.height[re] = h
return h
}

// Parse stack manipulation.

// push pushes the regexp re onto the parse stack and returns the regexp.
Expand Down Expand Up @@ -137,6 +189,7 @@ func (p *parser) push(re *Regexp) *Regexp {
}

p.stack = append(p.stack, re)
p.checkHeight(re)
return re
}

Expand Down Expand Up @@ -246,6 +299,7 @@ func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (
re.Sub = re.Sub0[:1]
re.Sub[0] = sub
p.stack[n-1] = re
p.checkHeight(re)

if op == OpRepeat && (min >= 2 || max >= 2) && !repeatIsValid(re, 1000) {
return "", &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
Expand Down Expand Up @@ -693,6 +747,21 @@ func literalRegexp(s string, flags Flags) *Regexp {
// Flags, and returns a regular expression parse tree. The syntax is
// described in the top-level comment.
func Parse(s string, flags Flags) (*Regexp, error) {
return parse(s, flags)
}

func parse(s string, flags Flags) (_ *Regexp, err error) {
defer func() {
switch r := recover(); r {
default:
panic(r)
case nil:
// ok
case ErrInternalError:
err = &Error{Code: ErrInternalError, Expr: s}
}
}()

if flags&Literal != 0 {
// Trivial parser for literal string.
if err := checkUTF8(s); err != nil {
Expand All @@ -704,7 +773,6 @@ func Parse(s string, flags Flags) (*Regexp, error) {
// Otherwise, must do real work.
var (
p parser
err error
c rune
op Op
lastRepeat string
Expand Down
7 changes: 7 additions & 0 deletions src/regexp/syntax/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,11 @@ var parseTests = []parseTest{
// Valid repetitions.
{`((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}))`, ``},
{`((((((((((x{1}){2}){2}){2}){2}){2}){2}){2}){2}){2})`, ``},

// Valid nesting.
{strings.Repeat("(", 999) + strings.Repeat(")", 999), ``},
{strings.Repeat("(?:", 999) + strings.Repeat(")*", 999), ``},
{"(" + strings.Repeat("|", 12345) + ")", ``}, // not nested at all
}

const testFlags = MatchNL | PerlX | UnicodeGroups
Expand Down Expand Up @@ -482,6 +487,8 @@ var invalidRegexps = []string{
`a{100000}`,
`a{100000,}`,
"((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})",
strings.Repeat("(", 1000) + strings.Repeat(")", 1000),
strings.Repeat("(?:", 1000) + strings.Repeat(")*", 1000),
`\Q\E*`,
}

Expand Down

0 comments on commit 452f24a

Please sign in to comment.