Skip to content

Commit

Permalink
Allow non-alphanum characters in tag content
Browse files Browse the repository at this point in the history
The tag content should allow non-alphanum characters. See the spec for
section tag content [1]:

> These tags' content MUST be a non-whitespace character sequence NOT
> containing the current closing delimiter; ...

For the 3 added test cases, #1 and #3 will fail today, and #2 will
trigger a panic. The code change fixes them.

The expected behavior can also be verified on
http://mustache.github.io/#demo, with Mustache:
1: {{#key*}}{{.}}{{/key*}}
2: {{#key}}{{*}}{{/key}}
2: {{#key}}{{*}*}}{{/key}}

and JSON:
{
  "key*": "value*",
  "key": "value",
  "*": "star",
  "*}*": "fish"
}

We can get output as:
1: value*
2: star
2: fish

[1] https://github.com/mustache/spec/blob/b1329a25e6d265ff360267d23f7c6327bbf59f52/specs/sections.yml#L5
  • Loading branch information
Xuewei Zhang committed Apr 5, 2021
1 parent 8bb9cfc commit b7d834c
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 11 deletions.
30 changes: 19 additions & 11 deletions lex.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import (
"bytes"
"fmt"
"strings"
"unicode"
"unicode/utf8"
)

Expand All @@ -30,7 +29,7 @@ type tokenType int
const (
tokenError tokenType = iota // error occurred; value is text of error
tokenEOF
tokenIdentifier // alphanumeric identifier
tokenIdentifier // tag identifier: non-whitespace characters NOT containing closing delimiter
tokenLeftDelim // {{ left action delimiter
tokenRightDelim // }} right action delimiter
tokenText // plain text
Expand Down Expand Up @@ -279,22 +278,21 @@ func stateTag(l *lexer) stateFn {
l.emit(tokenPartial)
case r == '{':
l.emit(tokenRawStart)
case alphanum(r):
default:
l.backup()
return stateIdent
default:
return l.errorf("unrecognized character in action: %#U", r)
}
return stateTag
}

// stateIdent scans an alphanumeric or field.
// stateIdent scans an partial tag or field.
func stateIdent(l *lexer) stateFn {
Loop:
for {

switch r := l.next(); {
case alphanum(r):
// absorb.
case !whitespace(r) && !isRightDelim(l, r):
// absorb
default:
l.backup()
l.emit(tokenIdentifier)
Expand Down Expand Up @@ -366,7 +364,17 @@ func whitespace(r rune) bool {
return false
}

// alphanum reports whether r is an alphabetic, digit, or underscore.
func alphanum(r rune) bool {
return r == '_' || r == '.' || unicode.IsLetter(r) || unicode.IsDigit(r)
// isRightDelim reports whether r is a right delimiter.
func isRightDelim(l *lexer, r rune) bool {
length := len(l.rightDelim)
// Right delimiter can be either two charactors in the default case "}}", or single
// charactor when setting delimiter.
firstRightDelim, _ := utf8.DecodeRuneInString(l.rightDelim)
secondRightDelim, _ := utf8.DecodeRuneInString(l.rightDelim[1:])
if length == 1 && r == firstRightDelim {
return true
} else if length == 2 && r == firstRightDelim && l.peek() == secondRightDelim {
return true
}
return false
}
30 changes: 30 additions & 0 deletions parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,36 @@ func TestParser(t *testing.T) {
}},
},
},
{
"{{#*}}({{.}}){{/*}}",
[]node{
&sectionNode{"*", false, []node{
textNode("("),
&varNode{".", true},
textNode(")"),
}},
},
},
{
"{{#list}}({{*}}){{/list}}",
[]node{
&sectionNode{"list", false, []node{
textNode("("),
&varNode{"*", true},
textNode(")"),
}},
},
},
{
"{{#list}}({{a}a}}){{/list}}",
[]node{
&sectionNode{"list", false, []node{
textNode("("),
&varNode{"a}a", true},
textNode(")"),
}},
},
},
} {
parser := newParser(newLexer(test.template, "{{", "}}"))
elems, err := parser.parse()
Expand Down

0 comments on commit b7d834c

Please sign in to comment.