From 354fbaed6124dd3047e4c19464c98a3369289815 Mon Sep 17 00:00:00 2001 From: riacataquian Date: Thu, 9 Dec 2021 00:42:39 +0800 Subject: [PATCH] lexer: allow extglob wildcards as function names the lexer assumes an extglob token if any of the wildcards expressions (such as `@`, and `+`) are succeeded by a left parenthesis but that proves to be an issue if the wildcard is used as a function name. example input: ``` $ cat in.sh @() { echo "$@"; } ``` `bash` and `gosh` comparison: ``` $ bash ./in.sh hello $ ./gosh in.sh in.sh:5:1: "}" can only be used to close a block ``` given `in.sh`, gosh reports about a syntax error - this is because a closing bracket is found while the lexer isn't assuming a function block fix the issue by assuming a function if one of the conditions below is true: * if the expression is found at the beginning of the statement or if its preceded by a "function" * if `(` is immediately succeeded by a `)` - although this is a valid bash syntax, we'll operate on the likelihood that it is a function fixes #739 --- interp/interp_test.go | 14 ++++++++++++++ syntax/lexer.go | 26 ++++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/interp/interp_test.go b/interp/interp_test.go index fab2b0e1c..36851f172 100644 --- a/interp/interp_test.go +++ b/interp/interp_test.go @@ -3117,6 +3117,20 @@ hello, world hello, world `, }, + { + // globbing wildcard as function name + `@() { echo "$@"; }; @ lala; function +() { echo "$@"; }; + foo`, + "lala\nfoo\n", + }, + { + ` @() { echo "$@"; }; @ lala;`, + "lala\n", + }, + { + // globbing wildcard as function name but with space after the name + `+ () { echo "$@"; }; + foo; @ () { echo "$@"; }; @ lala; ? () { echo "$@"; }; ? bar`, + "foo\nlala\nbar\n", + }, } var runTestsWindows = []runTest{ diff --git a/syntax/lexer.go b/syntax/lexer.go index 3e98b7110..08663318a 100644 --- a/syntax/lexer.go +++ b/syntax/lexer.go @@ -290,7 +290,7 @@ skipSpace: p.advanceLitNone(r) } case '?', '*', '+', '@', '!': - if p.peekByte('(') { + if p.tokenizeGlob() { switch r { case '?': p.tok = globQuest @@ -346,6 +346,28 @@ skipSpace: } } +// tokenizeGlob determines whether the expression should be tokenized as a glob literal +func (p *Parser) tokenizeGlob() bool { + if p.val == "function" { + return false + } + // NOTE: empty pattern list is a valid globbing syntax, eg @() + // but we'll operate on the "likelihood" that it is a function; + // only tokenize if its a non-empty pattern list + if p.peekBytes("()") { + return false + } + return p.peekByte('(') +} + +func (p *Parser) peekBytes(s string) bool { + for p.bsp+(len(p.bs)-1) >= len(p.bs) { + p.fill() + } + bw := p.bsp + len(s) + return bw < len(p.bs) && bytes.HasPrefix(p.bs[p.bsp:bw], []byte(s)) +} + func (p *Parser) peekByte(b byte) bool { if p.bsp == len(p.bs) { p.fill() @@ -882,7 +904,7 @@ loop: tok = _Lit break loop case '?', '*', '+', '@', '!': - if p.peekByte('(') { + if p.tokenizeGlob() { tok = _Lit break loop }