Skip to content

Commit

Permalink
syntax: do not require peeking two bytes afterecho *
Browse files Browse the repository at this point in the history
The lexer wants to know if literal characters like `*` or `@` are
followed by `(`, because then they would be an extended globbing
expression like `@(pattern-list)`.

However, the current implementation first peeked for the bytes `()`,
to detect function declarations like `@() { ... }`.
Unfortunately, this caused interactive shells to hang on `echo *`
followed by a newline, as a newline is a single character.

To work around the problem, only peek `()` if we first peek `(`.

Moreover, the logic to call Parser.fill in Parser.peekBytes seems wrong.
The conditional definitely needs to use len(s), as that is the number of
bytes we want to look for. The new logic seems clearly better; we call
Parser.fill when p.bs contains fewer remaining bytes than len(s).

Note that we no longer loop on that logic, because we have zero tests
exercising that edge case, and I am slightly worried about endless loops
until we properly test those edge cases.

Fixes #835.
  • Loading branch information
mvdan committed Jul 3, 2022
1 parent b37fb54 commit 64dd233
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 13 deletions.
12 changes: 12 additions & 0 deletions cmd/gosh/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,16 @@ var interactiveTests = []struct {
"你好\n$ ",
},
},
{
pairs: []string{
"echo *; :\n",
"main.go main_test.go\n$ ",
"echo *\n",
"main.go main_test.go\n$ ",
"shopt -s globstar; echo **\n",
"main.go main_test.go\n$ ",
},
},
{
pairs: []string{
"echo foo; exit 0; echo bar\n",
Expand Down Expand Up @@ -185,9 +195,11 @@ func TestInteractive(t *testing.T) {

line := 1
for len(tc.pairs) > 0 {
t.Logf("write %q", tc.pairs[0])
if _, err := io.WriteString(inWriter, tc.pairs[0]); err != nil {
t.Fatal(err)
}
t.Logf("read %q", tc.pairs[1])
if err := readString(outReader, tc.pairs[1]); err != nil {
t.Fatal(err)
}
Expand Down
35 changes: 22 additions & 13 deletions syntax/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ skipSpace:
p.advanceLitNone(r)
}
case '?', '*', '+', '@', '!':
if p.tokenizeGlob() {
if p.extendedGlob() {
switch r {
case '?':
p.tok = globQuest
Expand Down Expand Up @@ -359,26 +359,35 @@ skipSpace:
}
}

// tokenizeGlob determines whether the expression should be tokenized as a glob literal
func (p *Parser) tokenizeGlob() bool {
// extendedGlob determines whether we're parsing a Bash extended globbing expression.
// For example, whether `*` or `@` are followed by `(` to form `@(foo)`.
func (p *Parser) extendedGlob() bool {
if p.val == "function" {
return false
}
// NOTE: empty pattern list is a valid globbing syntax, eg @()
// but we'll operate on the "likelihood" that it is a function;
// only tokenize if its a non-empty pattern list
if p.peekBytes("()") {
return false
if p.peekByte('(') {
// NOTE: empty pattern list is a valid globbing syntax like `@()`,
// but we'll operate on the "likelihood" that it is a function;
// only tokenize if its a non-empty pattern list.
// We do this after peeking for just one byte, so that the input `echo *`
// followed by a newline does not hang an interactive shell parser until
// another byte is input.
if p.peekBytes("()") {
return false
}
return true
}
return p.peekByte('(')
return false
}

func (p *Parser) peekBytes(s string) bool {
for p.bsp+(len(p.bs)-1) >= len(p.bs) {
peekEnd := p.bsp + len(s)
// TODO: This should loop for slow readers, e.g. those providing one byte at
// a time. Use a loop and test it with testing/iotest.OneByteReader.
if peekEnd > len(p.bs) {
p.fill()
}
bw := p.bsp + len(s)
return bw <= len(p.bs) && bytes.HasPrefix(p.bs[p.bsp:bw], []byte(s))
return peekEnd <= len(p.bs) && bytes.HasPrefix(p.bs[p.bsp:peekEnd], []byte(s))
}

func (p *Parser) peekByte(b byte) bool {
Expand Down Expand Up @@ -917,7 +926,7 @@ loop:
tok = _Lit
break loop
case '?', '*', '+', '@', '!':
if p.tokenizeGlob() {
if p.extendedGlob() {
tok = _Lit
break loop
}
Expand Down

0 comments on commit 64dd233

Please sign in to comment.