From 354fbaed6124dd3047e4c19464c98a3369289815 Mon Sep 17 00:00:00 2001
From: riacataquian <icataquian@gmail.com>
Date: Thu, 9 Dec 2021 00:42:39 +0800
Subject: [PATCH] lexer: allow extglob wildcards as function names

the lexer assumes an extglob token if any of the wildcards expressions
(such as `@`, and `+`) are succeeded by a left parenthesis but that
proves to be an issue if the wildcard is used as a function name.

example input:
```
$ cat in.sh
@() {
  echo "$@";
}
```

`bash` and `gosh` comparison:
```
$ bash ./in.sh
hello
$ ./gosh in.sh
in.sh:5:1: "}" can only be used to close a block
```

given `in.sh`, gosh reports about a syntax error - this is because
a closing bracket is found while the lexer isn't assuming a function
block

fix the issue by assuming a function if one of the conditions below is
true:
* if the expression is found at the beginning of the statement or if its
  preceded by a "function"
* if `(` is immediately succeeded by a `)` - although this is a valid
  bash syntax, we'll operate on the likelihood that it is a function

fixes #739
---
 interp/interp_test.go | 14 ++++++++++++++
 syntax/lexer.go       | 26 ++++++++++++++++++++++++--
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/interp/interp_test.go b/interp/interp_test.go
index fab2b0e1c..36851f172 100644
--- a/interp/interp_test.go
+++ b/interp/interp_test.go
@@ -3117,6 +3117,20 @@ hello, world
 hello, world
 `,
 	},
+	{
+		// globbing wildcard as function name
+		`@() { echo "$@"; }; @ lala; function +() { echo "$@"; }; + foo`,
+		"lala\nfoo\n",
+	},
+	{
+		`      @() { echo "$@"; }; @ lala;`,
+		"lala\n",
+	},
+	{
+		// globbing wildcard as function name but with space after the name
+		`+ () { echo "$@"; }; + foo; @ () { echo "$@"; }; @ lala; ? () { echo "$@"; }; ? bar`,
+		"foo\nlala\nbar\n",
+	},
 }
 
 var runTestsWindows = []runTest{
diff --git a/syntax/lexer.go b/syntax/lexer.go
index 3e98b7110..08663318a 100644
--- a/syntax/lexer.go
+++ b/syntax/lexer.go
@@ -290,7 +290,7 @@ skipSpace:
 				p.advanceLitNone(r)
 			}
 		case '?', '*', '+', '@', '!':
-			if p.peekByte('(') {
+			if p.tokenizeGlob() {
 				switch r {
 				case '?':
 					p.tok = globQuest
@@ -346,6 +346,28 @@ skipSpace:
 	}
 }
 
+// tokenizeGlob determines whether the expression should be tokenized as a glob literal
+func (p *Parser) tokenizeGlob() bool {
+	if p.val == "function" {
+		return false
+	}
+	// NOTE: empty pattern list is a valid globbing syntax, eg @()
+	// but we'll operate on the "likelihood" that it is a function;
+	// only tokenize if its a non-empty pattern list
+	if p.peekBytes("()") {
+		return false
+	}
+	return p.peekByte('(')
+}
+
+func (p *Parser) peekBytes(s string) bool {
+	for p.bsp+(len(p.bs)-1) >= len(p.bs) {
+		p.fill()
+	}
+	bw := p.bsp + len(s)
+	return bw < len(p.bs) && bytes.HasPrefix(p.bs[p.bsp:bw], []byte(s))
+}
+
 func (p *Parser) peekByte(b byte) bool {
 	if p.bsp == len(p.bs) {
 		p.fill()
@@ -882,7 +904,7 @@ loop:
 			tok = _Lit
 			break loop
 		case '?', '*', '+', '@', '!':
-			if p.peekByte('(') {
+			if p.tokenizeGlob() {
 				tok = _Lit
 				break loop
 			}