Skip to content

Commit

Permalink
Introduce caseInsensitive lexer rule option, fixes #3436
Browse files Browse the repository at this point in the history
  • Loading branch information
KvanTTT committed Dec 28, 2021
1 parent 13ba87e commit a853356
Show file tree
Hide file tree
Showing 16 changed files with 254 additions and 148 deletions.
13 changes: 13 additions & 0 deletions doc/lexer-rules.md
Original file line number Diff line number Diff line change
Expand Up @@ -308,3 +308,16 @@ As of 4.5, you can also define channel names like enumerations with the followin
```
channels { WSCHANNEL, MYHIDDEN }
```
## Lexer Rule Options
### caseInsensitive
Defines if the current lexer rule is case-insensitive.
The argument can be `true` or `false`.
The option rewrites `caseInsensitive` grammar option value if it's defined.
```g4
options { caseInsensitive=true; }
STRING options { caseInsensitive=false; } : 'N'? '\'' (~'\'' | '\'\'')* '\''; // lower n is not allowed
```
9 changes: 2 additions & 7 deletions doc/options.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,9 @@ The mechanism works by automatically transforming grammar references to characte

## Rule Options

There are currently no valid rule-level options, but the tool still supports the following syntax for future use:
### caseInsensitive

```
rulename
options {...}
: ...
;
```
The tool support `caseInsensitive` lexer rule option that is described in [lexer-rules.md](lexer-rules.md#caseinsensitive).

## Rule Element Options

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,26 @@ public void testSetUp() throws Exception {
checkLexerMatches(lg, inputString, "TOKEN, EOF");
}

@Test public void testCaseInsensitiveInLexerRule() throws Exception {
LexerGrammar lg = new LexerGrammar(
"lexer grammar L;\n" +
"TOKEN1 options { caseInsensitive=true; } : [a-f]+;\n" +
"WS: [ ]+ -> skip;"
);

checkLexerMatches(lg, "ABCDEF", "TOKEN1, EOF");
}

@Test public void testCaseInsensitiveInLexerRuleOverridesGlobalValue() {
String grammar =
"lexer grammar L;\n" +
"options { caseInsensitive=true; }\n" +
"STRING options { caseInsensitive=false; } : 'N'? '\\'' (~'\\'' | '\\'\\'')* '\\'';\n";

execLexer("L.g4", grammar, "L", "n'sample'");
assertEquals("line 1:0 token recognition error at: 'n'\n", getParseErrors());
}

protected void checkLexerMatches(LexerGrammar lg, String inputString, String expecting) {
ATN atn = createATN(lg, true);
CharStream input = CharStreams.fromString(inputString);
Expand Down
38 changes: 35 additions & 3 deletions tool-testsuite/test/org/antlr/v4/test/tool/TestSymbolIssues.java
Original file line number Diff line number Diff line change
Expand Up @@ -472,13 +472,45 @@ public void testLabelsForTokensWithMixedTypesLRWithoutLabels() {
testErrors(test, false);
}

@Test public void testIllegalModeOption() {
@Test public void testIllegalCaseInsensitiveOptionValue() {
String[] test = {
"lexer grammar L;\n" +
"options { caseInsensitive = badValue; }\n" +
"DEFAULT_TOKEN: [A-F]+;\n",
"TOKEN_1 options { caseInsensitive = badValue; } : [A-F]+;\n",

"warning(" + ErrorType.ILLEGAL_OPTION_VALUE.code + "): L.g4:2:28: unsupported option value caseInsensitive=badValue\n"
"warning(" + ErrorType.ILLEGAL_OPTION_VALUE.code + "): L.g4:2:28: unsupported option value caseInsensitive=badValue\n" +
"warning(" + ErrorType.ILLEGAL_OPTION_VALUE.code + "): L.g4:3:36: unsupported option value caseInsensitive=badValue\n"
};

testErrors(test, false);
}

@Test public void testRedundantCaseInsensitiveLexerRuleOption() {
String[] test = {
"lexer grammar L;\n" +
"options { caseInsensitive = true; }\n" +
"TOKEN options { caseInsensitive = true; } : [A-F]+;\n",

"warning(" + ErrorType.REDUNDANT_CASE_INSENSITIVE_LEXER_RULE_OPTION.code + "): L.g4:3:16: caseInsensitive lexer rule option is redundant because its value equals to global value (true)\n"
};
testErrors(test, false);

String[] test2 = {
"lexer grammar L;\n" +
"options { caseInsensitive = false; }\n" +
"TOKEN options { caseInsensitive = false; } : [A-F]+;\n",

"warning(" + ErrorType.REDUNDANT_CASE_INSENSITIVE_LEXER_RULE_OPTION.code + "): L.g4:3:16: caseInsensitive lexer rule option is redundant because its value equals to global value (false)\n"
};
testErrors(test2, false);
}

@Test public void testCaseInsensitiveOptionInParseRule() {
String[] test = {
"grammar G;\n" +
"root options { caseInsensitive=true; } : 'token';",

"warning(" + ErrorType.ILLEGAL_OPTION.code + "): G.g4:2:15: unsupported option caseInsensitive\n"
};

testErrors(test, false);
Expand Down
55 changes: 21 additions & 34 deletions tool/src/org/antlr/v4/automata/LexerATNFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@ public class LexerATNFactory extends ParserATNFactory {

private final List<String> ruleCommands = new ArrayList<String>();

private final boolean caseInsensitive;

/**
* Maps from an action index to a {@link LexerAction} object.
*/
Expand All @@ -77,8 +75,6 @@ public LexerATNFactory(LexerGrammar g) {
public LexerATNFactory(LexerGrammar g, CodeGenerator codeGenerator) {
super(g);
// use codegen to get correct language templates for lexer commands
String caseInsensitiveOption = g.getOptionString("caseInsensitive");
caseInsensitive = caseInsensitiveOption != null && caseInsensitiveOption.equals("true");
codegenTemplates = (codeGenerator == null ? CodeGenerator.create(g) : codeGenerator).getTemplates();
}

Expand Down Expand Up @@ -192,51 +188,42 @@ public Handle lexerAltCommands(Handle alt, Handle cmds) {

@Override
public Handle lexerCallCommand(GrammarAST ID, GrammarAST arg) {
LexerAction lexerAction = createLexerAction(ID, arg);
if (lexerAction != null) {
return action(ID, lexerAction);
}

// fall back to standard action generation for the command
ST cmdST = codegenTemplates.getInstanceOf("Lexer" +
CharSupport.capitalize(ID.getText())+
"Command");
if (cmdST == null) {
g.tool.errMgr.grammarError(ErrorType.INVALID_LEXER_COMMAND, g.fileName, ID.token, ID.getText());
return epsilon(ID);
}

if (cmdST.impl.formalArguments == null || !cmdST.impl.formalArguments.containsKey("arg")) {
g.tool.errMgr.grammarError(ErrorType.UNWANTED_LEXER_COMMAND_ARGUMENT, g.fileName, ID.token, ID.getText());
return epsilon(ID);
}

cmdST.add("arg", arg.getText());
cmdST.add("grammar", arg.g);
return action(cmdST.render());
return lexerCallCommandOrCommand(ID, arg);
}

@Override
public Handle lexerCommand(GrammarAST ID) {
LexerAction lexerAction = createLexerAction(ID, null);
return lexerCallCommandOrCommand(ID, null);
}

private Handle lexerCallCommandOrCommand(GrammarAST ID, GrammarAST arg) {
LexerAction lexerAction = createLexerAction(ID, arg);
if (lexerAction != null) {
return action(ID, lexerAction);
}

// fall back to standard action generation for the command
ST cmdST = codegenTemplates.getInstanceOf("Lexer" +
CharSupport.capitalize(ID.getText()) +
CharSupport.capitalize(ID.getText())+
"Command");
if (cmdST == null) {
g.tool.errMgr.grammarError(ErrorType.INVALID_LEXER_COMMAND, g.fileName, ID.token, ID.getText());
return epsilon(ID);
}

if (cmdST.impl.formalArguments != null && cmdST.impl.formalArguments.containsKey("arg")) {
g.tool.errMgr.grammarError(ErrorType.MISSING_LEXER_COMMAND_ARGUMENT, g.fileName, ID.token, ID.getText());
boolean callCommand = arg != null;
boolean containsArg = cmdST.impl.formalArguments != null && cmdST.impl.formalArguments.containsKey("arg");
if (callCommand != containsArg) {
ErrorType errorType = callCommand ? ErrorType.UNWANTED_LEXER_COMMAND_ARGUMENT : ErrorType.MISSING_LEXER_COMMAND_ARGUMENT;
g.tool.errMgr.grammarError(errorType, g.fileName, ID.token, ID.getText());
return epsilon(ID);
}

if (callCommand) {
cmdST.add("arg", arg.getText());
cmdST.add("grammar", arg.g);
}

return action(cmdST.render());
}

Expand Down Expand Up @@ -264,7 +251,7 @@ public Handle set(GrammarAST associatedAST, List<GrammarAST> alts, boolean inver
int a = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(0).getText());
int b = CharSupport.getCharValueFromGrammarCharLiteral(t.getChild(1).getText());
if (checkRange((GrammarAST)t.getChild(0), (GrammarAST)t.getChild(1), a, b)) {
checkRangeAndAddToSet(associatedAST, t, set, a, b, caseInsensitive, null);
checkRangeAndAddToSet(associatedAST, t, set, a, b, currentRule.caseInsensitive, null);
}
}
else if ( t.getType()==ANTLRParser.LEXER_CHAR_SET ) {
Expand Down Expand Up @@ -553,11 +540,11 @@ private void applyPrevState(GrammarAST charSetAST, IntervalSet set, CharSetParse
}

private void checkCharAndAddToSet(GrammarAST ast, IntervalSet set, int c) {
checkRangeAndAddToSet(ast, ast, set, c, c, caseInsensitive, null);
checkRangeAndAddToSet(ast, ast, set, c, c, currentRule.caseInsensitive, null);
}

private void checkRangeAndAddToSet(GrammarAST mainAst, IntervalSet set, int a, int b) {
checkRangeAndAddToSet(mainAst, mainAst, set, a, b, caseInsensitive, null);
checkRangeAndAddToSet(mainAst, mainAst, set, a, b, currentRule.caseInsensitive, null);
}

private CharactersDataCheckStatus checkRangeAndAddToSet(GrammarAST rootAst, GrammarAST ast, IntervalSet set, int a, int b, boolean caseInsensitive, CharactersDataCheckStatus previousStatus) {
Expand Down Expand Up @@ -616,7 +603,7 @@ private CharactersDataCheckStatus checkRangeAndAddToSet(GrammarAST rootAst, Gram

private Transition createTransition(ATNState target, int from, int to, CommonTree tree) {
RangeBorderCharactersData charactersData = RangeBorderCharactersData.getAndCheckCharactersData(from, to, g, tree, true);
if (caseInsensitive) {
if (currentRule.caseInsensitive) {
if (charactersData.isSingleRange()) {
return CodePointTransitions.createWithCodePointRange(target, from, to);
}
Expand Down
24 changes: 18 additions & 6 deletions tool/src/org/antlr/v4/parse/ANTLRLexer.g
Original file line number Diff line number Diff line change
Expand Up @@ -163,14 +163,26 @@ import org.antlr.v4.runtime.misc.Interval;
* Return token or null if for some reason we can't find the start.
*/
public Token getRuleOrSubruleStartToken() {
if ( tokens==null ) return null;
if (tokens == null) return null;
int i = tokens.index();
int n = tokens.size();
if ( i>=n ) i = n-1; // seems index == n as we lex
while ( i>=0 && i<n) {
int n = tokens.size();
if (i >= n) i = n - 1; // seems index == n as we lex
boolean withinOptionsBlock = false;
while (i >= 0 && i < n) {
int ttype = tokens.get(i).getType();
if ( ttype == LPAREN || ttype == TOKEN_REF || ttype == RULE_REF ) {
return tokens.get(i);
if (withinOptionsBlock) {
// Ignore rule options content
if (ttype == OPTIONS) {
withinOptionsBlock = false;
}
}
else {
if (ttype == RBRACE) {
withinOptionsBlock = true;
}
else if (ttype == LPAREN || ttype == TOKEN_REF || ttype == RULE_REF) {
return tokens.get(i);
}
}
i--;
}
Expand Down
8 changes: 6 additions & 2 deletions tool/src/org/antlr/v4/parse/ANTLRParser.g
Original file line number Diff line number Diff line change
Expand Up @@ -517,9 +517,13 @@ lexerRule
paraphrases.pop();
}
: FRAGMENT?
TOKEN_REF COLON lexerRuleBlock SEMI
TOKEN_REF
optionsSpec?
COLON lexerRuleBlock SEMI
-> ^( RULE<RuleAST> TOKEN_REF
^(RULEMODIFIERS FRAGMENT)? lexerRuleBlock
^(RULEMODIFIERS FRAGMENT)? optionsSpec? lexerRuleBlock
)
;
Expand Down
5 changes: 3 additions & 2 deletions tool/src/org/antlr/v4/parse/GrammarTreeVisitor.g
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ public void discoverRule(RuleAST rule, GrammarAST ID, List<GrammarAST> modifiers
List<GrammarAST> actions,
GrammarAST block) { }
public void finishRule(RuleAST rule, GrammarAST ID, GrammarAST block) { }
public void discoverLexerRule(RuleAST rule, GrammarAST ID, List<GrammarAST> modifiers,
public void discoverLexerRule(RuleAST rule, GrammarAST ID, List<GrammarAST> modifiers, GrammarAST options,
GrammarAST block) { }
public void finishLexerRule(RuleAST rule, GrammarAST ID, GrammarAST block) { }
public void ruleCatch(GrammarAST arg, ActionAST action) { }
Expand Down Expand Up @@ -525,7 +525,8 @@ lexerRule
: ^( RULE TOKEN_REF
{currentRuleName=$TOKEN_REF.text; currentRuleAST=$RULE;}
(^(RULEMODIFIERS m=FRAGMENT {mods.add($m);}))?
{discoverLexerRule((RuleAST)$RULE, $TOKEN_REF, mods, (GrammarAST)input.LT(1));}
opts=optionsSpec*
{discoverLexerRule((RuleAST)$RULE, $TOKEN_REF, mods, $opts.start, (GrammarAST)input.LT(1));}
lexerRuleBlock
{
finishLexerRule((RuleAST)$RULE, $TOKEN_REF, $lexerRuleBlock.start);
Expand Down
Loading

0 comments on commit a853356

Please sign in to comment.