From d5fe45cd0886954d287d3c8d26b95faf526fa374 Mon Sep 17 00:00:00 2001 From: Xuewei Zhang Date: Mon, 5 Apr 2021 12:37:05 -0700 Subject: [PATCH] Allow non-alphanum characters in tag content The tag content should allow non-alphanum characters. See the spec for section tag content [1]: > These tags' content MUST be a non-whitespace character sequence NOT > containing the current closing delimiter; ... For the 3 added test cases, #1 and #3 will fail today, and #2 will trigger a panic. The code change fixes them. The expected behavior can also be verified on http://mustache.github.io/#demo, with Mustache: 1: {{#key*}}{{.}}{{/key*}} 2: {{#key}}{{*}}{{/key}} 2: {{#key}}{{*}*}}{{/key}} and JSON: { "key*": "value*", "key": "value", "*": "star", "*}*": "fish" } We can get output as: 1: value* 2: star 2: fish [1] https://github.com/mustache/spec/blob/b1329a25e6d265ff360267d23f7c6327bbf59f52/specs/sections.yml#L5 --- lex.go | 22 +++++++--------------- lex_test.go | 6 +++--- parse_test.go | 30 ++++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 18 deletions(-) diff --git a/lex.go b/lex.go index 32fd368..73d9345 100644 --- a/lex.go +++ b/lex.go @@ -7,7 +7,6 @@ import ( "bytes" "fmt" "strings" - "unicode" "unicode/utf8" ) @@ -30,7 +29,7 @@ type tokenType int const ( tokenError tokenType = iota // error occurred; value is text of error tokenEOF - tokenIdentifier // alphanumeric identifier + tokenIdentifier // tag identifier: non-whitespace characters NOT containing closing delimiter tokenLeftDelim // {{ left action delimiter tokenRightDelim // }} right action delimiter tokenText // plain text @@ -279,24 +278,22 @@ func stateTag(l *lexer) stateFn { l.emit(tokenPartial) case r == '{': l.emit(tokenRawStart) - case alphanum(r): + default: l.backup() return stateIdent - default: - return l.errorf("unrecognized character in action: %#U", r) } return stateTag } -// stateIdent scans an alphanumeric or field. +// stateIdent scans an partial tag or field. func stateIdent(l *lexer) stateFn { Loop: for { - switch r := l.next(); { - case alphanum(r): - // absorb. + switch r := l.peek(); { + case !whitespace(r) && !strings.HasPrefix(l.input[l.pos:], l.rightDelim): + // absorb + l.next() default: - l.backup() l.emit(tokenIdentifier) break Loop } @@ -365,8 +362,3 @@ func whitespace(r rune) bool { } return false } - -// alphanum reports whether r is an alphabetic, digit, or underscore. -func alphanum(r rune) bool { - return r == '_' || r == '.' || unicode.IsLetter(r) || unicode.IsDigit(r) -} diff --git a/lex_test.go b/lex_test.go index a39b7c6..1b2a54f 100644 --- a/lex_test.go +++ b/lex_test.go @@ -27,7 +27,7 @@ func TestLexer(t *testing.T) { }, }, { - "\nfoo {{bar}} baz {{=| |=}}\r\n |foo| |={{ }}=| {{bar}}", + "\nfoo {{bar}} baz {{=| |=}}\r\n |foo| |={{! !}}=| {{!bar!}}", []token{ {typ: tokenText, val: "\nfoo "}, {typ: tokenLeftDelim, val: "{{"}, @@ -42,9 +42,9 @@ func TestLexer(t *testing.T) { {typ: tokenText, val: " "}, {typ: tokenSetDelim}, {typ: tokenText, val: " "}, - {typ: tokenLeftDelim, val: "{{"}, + {typ: tokenLeftDelim, val: "{{!"}, {typ: tokenIdentifier, val: "bar"}, - {typ: tokenRightDelim, val: "}}"}, + {typ: tokenRightDelim, val: "!}}"}, {typ: tokenEOF}, }, }, diff --git a/parse_test.go b/parse_test.go index 4c6bee7..09ad645 100644 --- a/parse_test.go +++ b/parse_test.go @@ -56,6 +56,36 @@ func TestParser(t *testing.T) { }}, }, }, + { + "{{#*}}({{.}}){{/*}}", + []node{ + §ionNode{"*", false, []node{ + textNode("("), + &varNode{".", true}, + textNode(")"), + }}, + }, + }, + { + "{{#list}}({{*}}){{/list}}", + []node{ + §ionNode{"list", false, []node{ + textNode("("), + &varNode{"*", true}, + textNode(")"), + }}, + }, + }, + { + "{{#list}}({{a}a}}){{/list}}", + []node{ + §ionNode{"list", false, []node{ + textNode("("), + &varNode{"a}a", true}, + textNode(")"), + }}, + }, + }, } { parser := newParser(newLexer(test.template, "{{", "}}")) elems, err := parser.parse()