Skip to content

Commit

Permalink
Merge branch 'master' into caseInsensitiveLexerCommand
Browse files Browse the repository at this point in the history
  • Loading branch information
parrt authored Dec 28, 2021
2 parents e8771c1 + 13ba87e commit 71dbd36
Show file tree
Hide file tree
Showing 38 changed files with 373 additions and 446 deletions.
2 changes: 1 addition & 1 deletion doc/actions.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ returnStat : 'return' expr {System.out.println("matched "+$expr.text);} ;
Using a rule label looks like this:

```
returnStat : 'return' e=expr {System.out.println("matched "+e.text);} ;
returnStat : 'return' e=expr {System.out.println("matched "+$e.text);} ;
```

You can also use `$` followed by the name of the attribute to access the value associated with the currently executing rule. For example, `$start` is the starting token of the current rule.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[type]
Lexer

[grammar]
lexer grammar L;
LF : '\\u000A';
X : 'x';

[input]
"""x
"""

[output]
[@0,0:0='x',<2>,1:0]
[@1,1:1='\n',<1>,1:1]
[@2,2:1='<EOF>',<-1>,2:0]

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
[notes]
Loopback doesn't eval predicate at start of alt

[type]
Parser

[grammar]
grammar T;
file_
@after {<ToStringTree("$ctx"):writeln()>}
: para para EOF ;
para: paraContent NL NL ;
paraContent : ('s'|'x'|{<LANotEquals("2",{T<ParserToken("Parser", "NL")>})>}? NL)+ ;
NL : '\n' ;
s : 's' ;
X : 'x' ;

[start]
file_

[input]
"""s


x
"""

[output]
"""(file_ (para (paraContent s) \n \n) (para (paraContent \n x \n)) <EOF>)
"""

[errors]
"""line 5:0 mismatched input '<EOF>' expecting {'s', '\n', 'x'}
"""

[skip]
Cpp
CSharp
Dart
Go
Node
PHP
Python2
Python3
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ public void testActions(String templates, String actionName, String action, Stri
AnalysisPipeline anal = new AnalysisPipeline(g);
anal.process();

CodeGenerator gen = new CodeGenerator(g);
CodeGenerator gen = CodeGenerator.create(g);
ST outputFileST = gen.generateParser(false);
String output = outputFileST.render();
//System.out.println(output);
Expand Down
1 change: 1 addition & 0 deletions tool-testsuite/test/org/antlr/v4/misc/UtilsTest.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.antlr.v4.misc;

import org.antlr.runtime.Token;
import org.antlr.v4.codegen.CodeGenerator;
import org.antlr.v4.tool.ast.GrammarAST;
import org.junit.Assert;
import org.junit.Test;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ public List<String> getEvalInfoForString(String grammarString, String pattern) t
if (g.isLexer()) factory = new LexerATNFactory((LexerGrammar) g);
g.atn = factory.createATN();

CodeGenerator gen = new CodeGenerator(g);
CodeGenerator gen = CodeGenerator.create(g);
ST outputFileST = gen.generateParser();

// STViz viz = outputFileST.inspect();
Expand Down
110 changes: 67 additions & 43 deletions tool-testsuite/test/org/antlr/v4/test/tool/TestToolSyntaxErrors.java
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,42 @@ public void AllErrorCodesDistinct() {
super.testErrors(pair, true);
}

// Test for https://github.com/antlr/antlr4/issues/2860, https://github.com/antlr/antlr4/issues/1105
@Test public void testEpsilonClosureInLexer() {
String grammar =
"lexer grammar T;\n" +
"TOKEN: '\\'' FRAGMENT '\\'';\n" +
"fragment FRAGMENT: ('x'|)+;";

String expected =
"error(" + ErrorType.EPSILON_CLOSURE.code + "): T.g4:3:9: rule FRAGMENT contains a closure with at least one alternative that can match an empty string\n";

String[] pair = new String[] {
grammar,
expected
};

super.testErrors(pair, true);
}

// Test for https://github.com/antlr/antlr4/issues/3359
@Test public void testEofClosure() {
String grammar =
"lexer grammar EofClosure;\n" +
"EofClosure: 'x' EOF*;\n" +
"EofInAlternative: 'y' ('z' | EOF);";

String expected =
"error(" + ErrorType.EOF_CLOSURE.code + "): EofClosure.g4:2:0: rule EofClosure contains a closure with at least one alternative that can match EOF\n";

String[] pair = new String[] {
grammar,
expected
};

super.testErrors(pair, true);
}

// Test for https://github.com/antlr/antlr4/issues/1203
@Test public void testEpsilonOptionalAndClosureAnalysis() {
String grammar =
Expand Down Expand Up @@ -452,8 +488,8 @@ public void AllErrorCodesDistinct() {
"lexer grammar A;\n" +
"STRING : '\\\"' '\\\"' 'x' ;";
String expected =
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:10: invalid escape sequence \\\"\n"+
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:15: invalid escape sequence \\\"\n";
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:10: invalid escape sequence \\\"\n"+
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:15: invalid escape sequence \\\"\n";

String[] pair = new String[] {
grammar,
Expand Down Expand Up @@ -493,10 +529,9 @@ public void AllErrorCodesDistinct() {
"lexer grammar A;\n" +
"RULE : 'Foo \\uAABG \\x \\u';\n";
String expected =
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:12: invalid escape sequence \\uAABG\n" +
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:19: invalid escape sequence \\x\n" +
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:22: invalid escape sequence \\u\n" +
"warning("+ErrorType.EPSILON_TOKEN.code+"): A.g4:2:0: non-fragment lexer rule RULE can match the empty string\n";
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:12: invalid escape sequence \\uAABG\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:19: invalid escape sequence \\x\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): A.g4:2:22: invalid escape sequence \\u\n";

String[] pair = new String[] {
grammar,
Expand Down Expand Up @@ -536,7 +571,6 @@ public void AllErrorCodesDistinct() {
@Test public void testInvalidCharSetsAndStringLiterals() {
String grammar =
"lexer grammar Test;\n" +
"INVALID_STRING_LITERAL: '\\\"' | '\\]' | '\\u24';\n" +
"INVALID_STRING_LITERAL_RANGE: 'GH'..'LM';\n" +
"INVALID_CHAR_SET: [\\u24\\uA2][\\{];\n" + //https://github.com/antlr/antlr4/issues/1077
"EMPTY_STRING_LITERAL_RANGE: 'F'..'A' | 'Z';\n" +
Expand All @@ -549,21 +583,14 @@ public void AllErrorCodesDistinct() {
"EMPTY_CHAR_SET_WITH_INVALID_ESCAPE_SEQUENCE: [\\'];"; // https://github.com/antlr/antlr4/issues/1556

String expected =
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:31: invalid escape sequence \\\"\n" +
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:38: invalid escape sequence \\]\n" +
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:45: invalid escape sequence \\u24\n" +
"error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:3:30: multi-character literals are not allowed in lexer sets: 'GH'\n" +
"error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:3:36: multi-character literals are not allowed in lexer sets: 'LM'\n" +
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:30: invalid escape sequence \\u24\\u\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:4:30: string literals and sets cannot be empty: []\n" +
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:40: invalid escape sequence \\{\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:4:40: string literals and sets cannot be empty: []\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:33: string literals and sets cannot be empty: 'F'..'A'\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:30: string literals and sets cannot be empty: 'f'..'a'\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:36: string literals and sets cannot be empty: []\n" +
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:11:84: invalid escape sequence \\'\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:11:84: string literals and sets cannot be empty: []\n" +
"warning("+ ErrorType.EPSILON_TOKEN.code + "): Test.g4:2:0: non-fragment lexer rule INVALID_STRING_LITERAL can match the empty string\n";
"error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:2:30: multi-character literals are not allowed in lexer sets: 'GH'\n" +
"error(" + ErrorType.INVALID_LITERAL_IN_LEXER_SET.code + "): Test.g4:2:36: multi-character literals are not allowed in lexer sets: 'LM'\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:3:30: invalid escape sequence \\u24\\u\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:3:40: invalid escape sequence \\{\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:4:33: string literals and sets cannot be empty: 'F'..'A'\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:30: string literals and sets cannot be empty: 'f'..'a'\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:36: string literals and sets cannot be empty: []\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:10:84: invalid escape sequence \\'\n";

String[] pair = new String[] {
grammar,
Expand All @@ -587,33 +614,23 @@ public void AllErrorCodesDistinct() {
"UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE: [\\p{Uppercase_Letter}-\\p{Lowercase_Letter}];\n" +
"UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE_2: [\\p{Letter}-Z];\n" +
"UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE_3: [A-\\p{Number}];\n" +
"INVERTED_UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE: [\\P{Uppercase_Letter}-\\P{Number}];\n";
"INVERTED_UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE: [\\P{Uppercase_Letter}-\\P{Number}];\n" +
"EMOJI_MODIFIER: [\\p{Grapheme_Cluster_Break=E_Base}];\n";

String expected =
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:32: invalid escape sequence \\u{}\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:2:32: string literals and sets cannot be empty: []\n" +
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:3:41: invalid escape sequence \\u{\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:3:41: string literals and sets cannot be empty: []\n" +
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:35: invalid escape sequence \\u{110\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:4:35: string literals and sets cannot be empty: []\n" +
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:5:32: invalid escape sequence \\p{}\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:5:32: string literals and sets cannot be empty: []\n" +
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:6:41: invalid escape sequence \\p{\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:6:41: string literals and sets cannot be empty: []\n" +
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:7:41: invalid escape sequence \\P{}\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:7:41: string literals and sets cannot be empty: []\n" +
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:8:34: invalid escape sequence \\p{NotAProperty}\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:8:34: string literals and sets cannot be empty: []\n" +
"warning(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:9:43: invalid escape sequence \\P{NotAProperty}\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:9:43: string literals and sets cannot be empty: []\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:2:32: invalid escape sequence \\u{}\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:3:41: invalid escape sequence \\u{\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:4:35: invalid escape sequence \\u{110\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:5:32: invalid escape sequence \\p{}\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:6:41: invalid escape sequence \\p{\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:7:41: invalid escape sequence \\P{}\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:8:34: invalid escape sequence \\p{NotAProperty}\n" +
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:9:43: invalid escape sequence \\P{NotAProperty}\n" +
"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:10:39: unicode property escapes not allowed in lexer charset range: [\\p{Uppercase_Letter}-\\p{Lowercase_Letter}]\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:10:39: string literals and sets cannot be empty: []\n" +
"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:11:41: unicode property escapes not allowed in lexer charset range: [\\p{Letter}-Z]\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:11:41: string literals and sets cannot be empty: []\n" +
"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:12:41: unicode property escapes not allowed in lexer charset range: [A-\\p{Number}]\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:12:41: string literals and sets cannot be empty: []\n" +
"error(" + ErrorType.UNICODE_PROPERTY_NOT_ALLOWED_IN_RANGE.code + "): Test.g4:13:48: unicode property escapes not allowed in lexer charset range: [\\P{Uppercase_Letter}-\\P{Number}]\n" +
"error(" + ErrorType.EMPTY_STRINGS_AND_SETS_NOT_ALLOWED.code + "): Test.g4:13:48: string literals and sets cannot be empty: []\n";
"error(" + ErrorType.INVALID_ESCAPE_SEQUENCE.code + "): Test.g4:14:16: invalid escape sequence \\p{Grapheme_Cluster_Break=E_Base}\n";

String[] pair = new String[] {
grammar,
Expand Down Expand Up @@ -839,4 +856,11 @@ public void AllErrorCodesDistinct() {

super.testErrors(pair, true);
}

@Test public void testRuleNamesAsTree() {
String grammar = "" +
"grammar T;\n" +
"tree : 'X';";
super.testErrors(new String[] { grammar, "" }, true);
}
}
44 changes: 16 additions & 28 deletions tool-testsuite/test/org/antlr/v4/test/tool/TestUnicodeEscapes.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,72 +7,60 @@
package org.antlr.v4.test.tool;

import org.antlr.v4.codegen.UnicodeEscapes;

import org.junit.Test;

import static org.junit.Assert.assertEquals;

public class TestUnicodeEscapes {
@Test
public void latinJavaEscape() {
StringBuilder sb = new StringBuilder();
UnicodeEscapes.appendJavaStyleEscapedCodePoint(0x0061, sb);
assertEquals("\\u0061", sb.toString());
checkUnicodeEscape("\\u0061", 0x0061, "Java");
}

@Test
public void latinPythonEscape() {
StringBuilder sb = new StringBuilder();
UnicodeEscapes.appendPythonStyleEscapedCodePoint(0x0061, sb);
assertEquals("\\u0061", sb.toString());
checkUnicodeEscape("\\u0061", 0x0061, "Python2");
checkUnicodeEscape("\\u0061", 0x0061, "Python3");
}

@Test
public void latinSwiftEscape() {
StringBuilder sb = new StringBuilder();
UnicodeEscapes.appendSwiftStyleEscapedCodePoint(0x0061, sb);
assertEquals("\\u{0061}", sb.toString());
checkUnicodeEscape("\\u{0061}", 0x0061, "Swift");
}

@Test
public void bmpJavaEscape() {
StringBuilder sb = new StringBuilder();
UnicodeEscapes.appendJavaStyleEscapedCodePoint(0xABCD, sb);
assertEquals("\\uABCD", sb.toString());
checkUnicodeEscape("\\uABCD", 0xABCD, "Java");
}

@Test
public void bmpPythonEscape() {
StringBuilder sb = new StringBuilder();
UnicodeEscapes.appendPythonStyleEscapedCodePoint(0xABCD, sb);
assertEquals("\\uABCD", sb.toString());
checkUnicodeEscape("\\uABCD", 0xABCD, "Python2");
checkUnicodeEscape("\\uABCD", 0xABCD, "Python3");
}

@Test
public void bmpSwiftEscape() {
StringBuilder sb = new StringBuilder();
UnicodeEscapes.appendSwiftStyleEscapedCodePoint(0xABCD, sb);
assertEquals("\\u{ABCD}", sb.toString());
checkUnicodeEscape("\\u{ABCD}", 0xABCD, "Swift");
}

@Test
public void smpJavaEscape() {
StringBuilder sb = new StringBuilder();
UnicodeEscapes.appendJavaStyleEscapedCodePoint(0x1F4A9, sb);
assertEquals("\\uD83D\\uDCA9", sb.toString());
checkUnicodeEscape("\\uD83D\\uDCA9", 0x1F4A9, "Java");
}

@Test
public void smpPythonEscape() {
StringBuilder sb = new StringBuilder();
UnicodeEscapes.appendPythonStyleEscapedCodePoint(0x1F4A9, sb);
assertEquals("\\U0001F4A9", sb.toString());
checkUnicodeEscape("\\U0001F4A9", 0x1F4A9, "Python2");
checkUnicodeEscape("\\U0001F4A9", 0x1F4A9, "Python3");
}

@Test
public void smpSwiftEscape() {
StringBuilder sb = new StringBuilder();
UnicodeEscapes.appendSwiftStyleEscapedCodePoint(0x1F4A9, sb);
assertEquals("\\u{1F4A9}", sb.toString());
checkUnicodeEscape("\\u{1F4A9}", 0x1F4A9, "Swift");
}

private void checkUnicodeEscape(String expected, int input, String language) {
assertEquals(expected, UnicodeEscapes.escapeCodePoint(input, language));
}
}
Loading

0 comments on commit 71dbd36

Please sign in to comment.