From 8542db7d1f8bc4bf0b88e40bf4cae6b47340aa59 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Mart=C3=AD?= <mvdan@mvdan.cc>
Date: Sun, 3 Jul 2022 17:07:20 +0100
Subject: [PATCH] syntax: do not require peeking two bytes after `echo *`

The lexer wants to know if literal characters like `*` or `@` are
followed by `(`, because then they would be an extended globbing
expression like `@(pattern-list)`.

However, the current implementation first peeked for the bytes `()`,
to detect function declarations like `@() { ... }`.
Unfortunately, this caused interactive shells to hang on `echo *`
followed by a newline, as a newline is a single character.

To work around the problem, only peek `()` if we first peek `(`.

Moreover, the logic to call Parser.fill in Parser.peekBytes seems wrong.
The conditional definitely needs to use len(s), as that is the number of
bytes we want to look for. The new logic seems clearly better; we call
Parser.fill when p.bs contains fewer remaining bytes than len(s).

Note that we no longer loop on that logic, because we have zero tests
exercising that edge case, and I am slightly worried about endless loops
until we properly test those edge cases.

Fixes #835.
---
 cmd/gosh/main_test.go | 12 ++++++++++++
 syntax/lexer.go       | 35 ++++++++++++++++++++++-------------
 2 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/cmd/gosh/main_test.go b/cmd/gosh/main_test.go
index 24342da29..e3b00ae38 100644
--- a/cmd/gosh/main_test.go
+++ b/cmd/gosh/main_test.go
@@ -139,6 +139,16 @@ var interactiveTests = []struct {
 			"你好\n$ ",
 		},
 	},
+	{
+		pairs: []string{
+			"echo *; :\n",
+			"main.go main_test.go\n$ ",
+			"echo *\n",
+			"main.go main_test.go\n$ ",
+			"shopt -s globstar; echo **\n",
+			"main.go main_test.go\n$ ",
+		},
+	},
 	{
 		pairs: []string{
 			"echo foo; exit 0; echo bar\n",
@@ -185,9 +195,11 @@ func TestInteractive(t *testing.T) {
 
 			line := 1
 			for len(tc.pairs) > 0 {
+				t.Logf("write %q", tc.pairs[0])
 				if _, err := io.WriteString(inWriter, tc.pairs[0]); err != nil {
 					t.Fatal(err)
 				}
+				t.Logf("read %q", tc.pairs[1])
 				if err := readString(outReader, tc.pairs[1]); err != nil {
 					t.Fatal(err)
 				}
diff --git a/syntax/lexer.go b/syntax/lexer.go
index a490d58e4..626b226cd 100644
--- a/syntax/lexer.go
+++ b/syntax/lexer.go
@@ -303,7 +303,7 @@ skipSpace:
 				p.advanceLitNone(r)
 			}
 		case '?', '*', '+', '@', '!':
-			if p.tokenizeGlob() {
+			if p.extendedGlob() {
 				switch r {
 				case '?':
 					p.tok = globQuest
@@ -359,26 +359,35 @@ skipSpace:
 	}
 }
 
-// tokenizeGlob determines whether the expression should be tokenized as a glob literal
-func (p *Parser) tokenizeGlob() bool {
+// extendedGlob determines whether we're parsing a Bash extended globbing expression.
+// For example, whether `*` or `@` are followed by `(` to form `@(foo)`.
+func (p *Parser) extendedGlob() bool {
 	if p.val == "function" {
 		return false
 	}
-	// NOTE: empty pattern list is a valid globbing syntax, eg @()
-	// but we'll operate on the "likelihood" that it is a function;
-	// only tokenize if its a non-empty pattern list
-	if p.peekBytes("()") {
-		return false
+	if p.peekByte('(') {
+		// NOTE: empty pattern list is a valid globbing syntax like `@()`,
+		// but we'll operate on the "likelihood" that it is a function;
+		// only tokenize if its a non-empty pattern list.
+		// We do this after peeking for just one byte, so that the input `echo *`
+		// followed by a newline does not hang an interactive shell parser until
+		// another byte is input.
+		if p.peekBytes("()") {
+			return false
+		}
+		return true
 	}
-	return p.peekByte('(')
+	return false
 }
 
 func (p *Parser) peekBytes(s string) bool {
-	for p.bsp+(len(p.bs)-1) >= len(p.bs) {
+	peekEnd := p.bsp + len(s)
+	// TODO: This should loop for slow readers, e.g. those providing one byte at
+	// a time. Use a loop and test it with testing/iotest.OneByteReader.
+	if peekEnd > len(p.bs) {
 		p.fill()
 	}
-	bw := p.bsp + len(s)
-	return bw <= len(p.bs) && bytes.HasPrefix(p.bs[p.bsp:bw], []byte(s))
+	return peekEnd <= len(p.bs) && bytes.HasPrefix(p.bs[p.bsp:peekEnd], []byte(s))
 }
 
 func (p *Parser) peekByte(b byte) bool {
@@ -917,7 +926,7 @@ loop:
 			tok = _Lit
 			break loop
 		case '?', '*', '+', '@', '!':
-			if p.tokenizeGlob() {
+			if p.extendedGlob() {
 				tok = _Lit
 				break loop
 			}