From abbac9dc9a6431b9e97dd77b5fb9be24aa6a3892 Mon Sep 17 00:00:00 2001
From: Xuewei Zhang <xuewei@observeinc.com>
Date: Mon, 5 Apr 2021 12:37:05 -0700
Subject: [PATCH] Allow non-alphanum characters in tag content

The tag content should allow non-alphanum characters. See the spec for
section tag content [1]:

> These tags' content MUST be a non-whitespace character sequence NOT
> containing the current closing delimiter; ...

For the 3 added test cases, #1 and #3 will fail today, and #2 will
trigger a panic. The code change fixes them.

The expected behavior can also be verified on
http://mustache.github.io/#demo, with Mustache:
1: {{#key*}}{{.}}{{/key*}}
2: {{#key}}{{*}}{{/key}}
2: {{#key}}{{*}*}}{{/key}}

and JSON:
{
  "key*": "value*",
  "key": "value",
  "*": "star",
  "*}*": "fish"
}

We can get output as:
1: value*
2: star
2: fish

[1] https://github.com/mustache/spec/blob/b1329a25e6d265ff360267d23f7c6327bbf59f52/specs/sections.yml#L5
---
 lex.go        | 22 +++++++---------------
 lex_test.go   |  6 +++---
 parse_test.go | 30 ++++++++++++++++++++++++++++++
 3 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/lex.go b/lex.go
index 32fd368..73d9345 100644
--- a/lex.go
+++ b/lex.go
@@ -7,7 +7,6 @@ import (
 	"bytes"
 	"fmt"
 	"strings"
-	"unicode"
 	"unicode/utf8"
 )
 
@@ -30,7 +29,7 @@ type tokenType int
 const (
 	tokenError tokenType = iota // error occurred; value is text of error
 	tokenEOF
-	tokenIdentifier     // alphanumeric identifier
+	tokenIdentifier     // tag identifier: non-whitespace characters NOT containing closing delimiter
 	tokenLeftDelim      // {{ left action delimiter
 	tokenRightDelim     // }} right action delimiter
 	tokenText           // plain text
@@ -279,24 +278,22 @@ func stateTag(l *lexer) stateFn {
 		l.emit(tokenPartial)
 	case r == '{':
 		l.emit(tokenRawStart)
-	case alphanum(r):
+	default:
 		l.backup()
 		return stateIdent
-	default:
-		return l.errorf("unrecognized character in action: %#U", r)
 	}
 	return stateTag
 }
 
-// stateIdent scans an alphanumeric or field.
+// stateIdent scans an partial tag or field.
 func stateIdent(l *lexer) stateFn {
 Loop:
 	for {
-		switch r := l.next(); {
-		case alphanum(r):
-			// absorb.
+		switch r := l.peek(); {
+		case !whitespace(r) && !strings.HasPrefix(l.input[l.pos:], l.rightDelim):
+			// absorb
+			l.next()
 		default:
-			l.backup()
 			l.emit(tokenIdentifier)
 			break Loop
 		}
@@ -365,8 +362,3 @@ func whitespace(r rune) bool {
 	}
 	return false
 }
-
-// alphanum reports whether r is an alphabetic, digit, or underscore.
-func alphanum(r rune) bool {
-	return r == '_' || r == '.' || unicode.IsLetter(r) || unicode.IsDigit(r)
-}
diff --git a/lex_test.go b/lex_test.go
index a39b7c6..1b2a54f 100644
--- a/lex_test.go
+++ b/lex_test.go
@@ -27,7 +27,7 @@ func TestLexer(t *testing.T) {
 			},
 		},
 		{
-			"\nfoo {{bar}} baz {{=| |=}}\r\n |foo| |={{ }}=| {{bar}}",
+			"\nfoo {{bar}} baz {{=| |=}}\r\n |foo| |={{! !}}=| {{!bar!}}",
 			[]token{
 				{typ: tokenText, val: "\nfoo "},
 				{typ: tokenLeftDelim, val: "{{"},
@@ -42,9 +42,9 @@ func TestLexer(t *testing.T) {
 				{typ: tokenText, val: " "},
 				{typ: tokenSetDelim},
 				{typ: tokenText, val: " "},
-				{typ: tokenLeftDelim, val: "{{"},
+				{typ: tokenLeftDelim, val: "{{!"},
 				{typ: tokenIdentifier, val: "bar"},
-				{typ: tokenRightDelim, val: "}}"},
+				{typ: tokenRightDelim, val: "!}}"},
 				{typ: tokenEOF},
 			},
 		},
diff --git a/parse_test.go b/parse_test.go
index 4c6bee7..09ad645 100644
--- a/parse_test.go
+++ b/parse_test.go
@@ -56,6 +56,36 @@ func TestParser(t *testing.T) {
 				}},
 			},
 		},
+		{
+			"{{#*}}({{.}}){{/*}}",
+			[]node{
+				&sectionNode{"*", false, []node{
+					textNode("("),
+					&varNode{".", true},
+					textNode(")"),
+				}},
+			},
+		},
+		{
+			"{{#list}}({{*}}){{/list}}",
+			[]node{
+				&sectionNode{"list", false, []node{
+					textNode("("),
+					&varNode{"*", true},
+					textNode(")"),
+				}},
+			},
+		},
+		{
+			"{{#list}}({{a}a}}){{/list}}",
+			[]node{
+				&sectionNode{"list", false, []node{
+					textNode("("),
+					&varNode{"a}a", true},
+					textNode(")"),
+				}},
+			},
+		},
 	} {
 		parser := newParser(newLexer(test.template, "{{", "}}"))
 		elems, err := parser.parse()