Allow non-alphanum characters in tag content

The tag content should allow non-alphanum characters. See the spec for section tag content [1]: > These tags' content MUST be a non-whitespace character sequence NOT > containing the current closing delimiter; ... For the 3 added test cases, #1 and #3 will fail today, and #2 will trigger a panic. The code change fixes them. The expected behavior can also be verified on http://mustache.github.io/#demo, with Mustache: 1: {{#key*}}{{.}}{{/key*}} 2: {{#key}}{{*}}{{/key}} 2: {{#key}}{{*}*}}{{/key}} and JSON: { "key*": "value*", "key": "value", "*": "star", "*}*": "fish" } We can get output as: 1: value* 2: star 2: fish [1] https://github.com/mustache/spec/blob/b1329a25e6d265ff360267d23f7c6327bbf59f52/specs/sections.yml#L5
observeinc · Jun 10, 2024 · abbac9d · abbac9d
1 parent 4281da4
commit abbac9d
Show file tree

Hide file tree

Showing 3 changed files with 40 additions and 18 deletions.
diff --git a/lex.go b/lex.go
@@ -7,7 +7,6 @@ import (
 	"bytes"
 	"fmt"
 	"strings"
-	"unicode"
 	"unicode/utf8"
 )
 
@@ -30,7 +29,7 @@ type tokenType int
 const (
 	tokenError tokenType = iota // error occurred; value is text of error
 	tokenEOF
-	tokenIdentifier     // alphanumeric identifier
+	tokenIdentifier     // tag identifier: non-whitespace characters NOT containing closing delimiter
 	tokenLeftDelim      // {{ left action delimiter
 	tokenRightDelim     // }} right action delimiter
 	tokenText           // plain text
@@ -279,24 +278,22 @@ func stateTag(l *lexer) stateFn {
 		l.emit(tokenPartial)
 	case r == '{':
 		l.emit(tokenRawStart)
-	case alphanum(r):
+	default:
 		l.backup()
 		return stateIdent
-	default:
-		return l.errorf("unrecognized character in action: %#U", r)
 	}
 	return stateTag
 }
 
-// stateIdent scans an alphanumeric or field.
+// stateIdent scans an partial tag or field.
 func stateIdent(l *lexer) stateFn {
 Loop:
 	for {
-		switch r := l.next(); {
-		case alphanum(r):
-			// absorb.
+		switch r := l.peek(); {
+		case !whitespace(r) && !strings.HasPrefix(l.input[l.pos:], l.rightDelim):
+			// absorb
+			l.next()
 		default:
-			l.backup()
 			l.emit(tokenIdentifier)
 			break Loop
 		}
@@ -365,8 +362,3 @@ func whitespace(r rune) bool {
 	}
 	return false
 }
-
-// alphanum reports whether r is an alphabetic, digit, or underscore.
-func alphanum(r rune) bool {
-	return r == '_' || r == '.' || unicode.IsLetter(r) || unicode.IsDigit(r)
-}
diff --git a/lex_test.go b/lex_test.go
@@ -27,7 +27,7 @@ func TestLexer(t *testing.T) {
 			},
 		},
 		{
-			"\nfoo {{bar}} baz {{=| |=}}\r\n |foo| |={{ }}=| {{bar}}",
+			"\nfoo {{bar}} baz {{=| |=}}\r\n |foo| |={{! !}}=| {{!bar!}}",
 			[]token{
 				{typ: tokenText, val: "\nfoo "},
 				{typ: tokenLeftDelim, val: "{{"},
@@ -42,9 +42,9 @@ func TestLexer(t *testing.T) {
 				{typ: tokenText, val: " "},
 				{typ: tokenSetDelim},
 				{typ: tokenText, val: " "},
-				{typ: tokenLeftDelim, val: "{{"},
+				{typ: tokenLeftDelim, val: "{{!"},
 				{typ: tokenIdentifier, val: "bar"},
-				{typ: tokenRightDelim, val: "}}"},
+				{typ: tokenRightDelim, val: "!}}"},
 				{typ: tokenEOF},
 			},
 		},

diff --git a/parse_test.go b/parse_test.go
@@ -56,6 +56,36 @@ func TestParser(t *testing.T) {
 				}},
 			},
 		},
+		{
+			"{{#*}}({{.}}){{/*}}",
+			[]node{
+				&sectionNode{"*", false, []node{
+					textNode("("),
+					&varNode{".", true},
+					textNode(")"),
+				}},
+			},
+		},
+		{
+			"{{#list}}({{*}}){{/list}}",
+			[]node{
+				&sectionNode{"list", false, []node{
+					textNode("("),
+					&varNode{"*", true},
+					textNode(")"),
+				}},
+			},
+		},
+		{
+			"{{#list}}({{a}a}}){{/list}}",
+			[]node{
+				&sectionNode{"list", false, []node{
+					textNode("("),
+					&varNode{"a}a", true},
+					textNode(")"),
+				}},
+			},
+		},
 	} {
 		parser := newParser(newLexer(test.template, "{{", "}}"))
 		elems, err := parser.parse()