Skip to content

Commit

Permalink
Support of full int range in serializer and deserializer (up to Integ…
Browse files Browse the repository at this point in the history
…er.MAX_VALUE)

fix antlr#840, fix antlr#1863, fix antlr#2732, fix antlr#3338
  • Loading branch information
KvanTTT committed Feb 20, 2022
1 parent 88f9bc7 commit a8378d3
Show file tree
Hide file tree
Showing 9 changed files with 290 additions and 235 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -366,9 +366,10 @@ public static RuntimeTestDescriptor[] getRuntimeTestDescriptors(String group, St
}

if (group.equals("LexerExec")) {
descriptors.add(GeneratedLexerDescriptors.getLineSeparatorLfTest(targetName));
descriptors.add(GeneratedLexerDescriptors.getLineSeparatorCrLfTest(targetName));
descriptors.add(GeneratedLexerDescriptors.getLineSeparatorLfDescriptor(targetName));
descriptors.add(GeneratedLexerDescriptors.getLineSeparatorCrLfDescriptor(targetName));
descriptors.add(GeneratedLexerDescriptors.getLargeLexerDescriptor(targetName));
descriptors.add(GeneratedLexerDescriptors.getAtnStatesSizeMoreThan65535Descriptor(targetName));
}

return descriptors.toArray(new RuntimeTestDescriptor[0]);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package org.antlr.v4.test.runtime;

import java.util.Collections;

public class GeneratedLexerDescriptors {
static RuntimeTestDescriptor getLineSeparatorLfTest(String targetName) {
static RuntimeTestDescriptor getLineSeparatorLfDescriptor(String targetName) {
UniversalRuntimeTestDescriptor result = new UniversalRuntimeTestDescriptor();
result.name = "LineSeparatorLf";
result.targetName = targetName;
Expand All @@ -20,7 +22,7 @@ static RuntimeTestDescriptor getLineSeparatorLfTest(String targetName) {
return result;
}

static RuntimeTestDescriptor getLineSeparatorCrLfTest(String targetName) {
static RuntimeTestDescriptor getLineSeparatorCrLfDescriptor(String targetName) {
UniversalRuntimeTestDescriptor result = new UniversalRuntimeTestDescriptor();
result.name = "LineSeparatorCrLf";
result.targetName = targetName;
Expand Down Expand Up @@ -65,4 +67,50 @@ static RuntimeTestDescriptor getLargeLexerDescriptor(String targetName) {
"[@1,5:4='<EOF>',<-1>,1:5]\n";
return result;
}

static RuntimeTestDescriptor getAtnStatesSizeMoreThan65535Descriptor(String targetName) {
UniversalRuntimeTestDescriptor result = new UniversalRuntimeTestDescriptor();
result.name = "AtnStatesSizeMoreThan65535";
result.notes = "Regression for https://github.com/antlr/antlr4/issues/1863";
result.targetName = targetName;
result.testType = "Lexer";

final int tokensCount = 1024;
final String suffix = String.join("", Collections.nCopies(70, "_"));

String grammarName = "L";
StringBuilder grammar = new StringBuilder();
grammar.append("lexer grammar ").append(grammarName).append(";\n");
grammar.append('\n');
StringBuilder input = new StringBuilder();
StringBuilder output = new StringBuilder();
int startOffset;
int stopOffset = -2;
for (int i = 0; i < tokensCount; i++) {
String value = "T_" + i + suffix;
grammar.append(value).append(": '").append(value).append("';\n");
input.append(value).append('\n');

startOffset = stopOffset + 2;
stopOffset += value.length() + 1;

output.append("[@").append(i).append(',').append(startOffset).append(':').append(stopOffset)
.append("='").append(value).append("',<").append(i + 1).append(">,").append(i + 1)
.append(":0]\n");
}

grammar.append("\n");
grammar.append("WS: [ \\t\\r\\n]+ -> skip;\n");

startOffset = stopOffset + 2;
stopOffset = startOffset - 1;
output.append("[@").append(tokensCount).append(',').append(startOffset).append(':').append(stopOffset)
.append("='<EOF>',<-1>,").append(tokensCount + 1).append(":0]\n");

result.grammar = grammar.toString();
result.grammarName = grammarName;
result.input = input.toString();
result.output = output.toString();
return result;
}
}
16 changes: 15 additions & 1 deletion runtime/Java/src/org/antlr/v4/runtime/atn/ATNDataReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,21 @@ public ATNDataReader(char[] data) {
this.data = data;
}

public int readUInt32() {
public int read() {
int value = readUInt16();
if (value == 0xFFFF) {
return -1;
}

int mask = value >> ATNDataWriter.MaskBits & 0b11;
return mask == 0
? value
: mask == 0b01
? (readUInt16() << ATNDataWriter.MaskBits) | (value & ((1 << ATNDataWriter.MaskBits) - 1))
: readInt32();
}

public int readInt32() {
return readUInt16() | (readUInt16() << 16);
}

Expand Down
36 changes: 33 additions & 3 deletions runtime/Java/src/org/antlr/v4/runtime/atn/ATNDataWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,49 @@
import org.antlr.v4.runtime.misc.IntegerList;

public class ATNDataWriter {
public static final int MaskBits = 14;
public static final int JavaOptimizeOffset = 2;

private final IntegerList data;
private final String language;
private final boolean isJava;

public ATNDataWriter(IntegerList data, String language) {
this.data = data;
this.language = language;
this.isJava = language.equals("Java");
}

public void writeUInt32(int value) {
/* Write int of full range [Integer.MIN_VALUE..Integer.MAX_VALUE] in compact format
| encoding | count | type |
| ----------------------------------------------------------- | ----- | ------------ |
| 00xx xxxx xxxx xxxx | 1 | int (14 bit) |
| 01xx xxxx xxxx xxxx xxxx xxxx xxxx xxxx | 2 | int (30 bit) |
| 1000 0000 0000 0000 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx | 3 | int (32 bit) |
| 1111 1111 1111 1111 | 1 | -1 (0xFFFF) |
*/
public int write(int value) {
if (value == -1) {
writeUInt16(0xFFFF);
return 1;
}

if (value >= 0) {
if (value < 1 << MaskBits) {
writeUInt16(value);
return 1;
}
else if (value < 1 << (MaskBits + 16)) {
writeUInt16(value & ((1 << MaskBits) - 1) | 0b01 << MaskBits);
writeUInt16(value >>> MaskBits);
return 2;
}
}

writeUInt16(0b10 << MaskBits);
writeInt32(value);
return 3;
}

public void writeInt32(int value) {
writeUInt16((char)value);
writeUInt16((char)(value >> 16));
}
Expand Down
112 changes: 43 additions & 69 deletions runtime/Java/src/org/antlr/v4/runtime/atn/ATNDeserializer.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,36 +45,33 @@ public ATN deserialize(char[] data) {
throw new UnsupportedOperationException(new InvalidClassException(ATN.class.getName(), reason));
}

ATNType grammarType = ATNType.values()[reader.readUInt16()];
int maxTokenType = reader.readUInt16();
ATNType grammarType = ATNType.values()[reader.read()];
int maxTokenType = reader.read();
ATN atn = new ATN(grammarType, maxTokenType);

//
// STATES
//
List<Pair<LoopEndState, Integer>> loopBackStateNumbers = new ArrayList<>();
List<Pair<BlockStartState, Integer>> endStateNumbers = new ArrayList<>();
int nstates = reader.readUInt16();
int nstates = reader.read();
for (int i=0; i<nstates; i++) {
int stype = reader.readUInt16();
int stype = reader.read();
// ignore bad type of states
if ( stype==ATNState.INVALID_TYPE ) {
atn.addState(null);
continue;
}

int ruleIndex = reader.readUInt16();
if (ruleIndex == Character.MAX_VALUE) {
ruleIndex = -1;
}
int ruleIndex = reader.read();

ATNState s = stateFactory(stype, ruleIndex);
if ( stype == ATNState.LOOP_END ) { // special case
int loopBackStateNumber = reader.readUInt16();
int loopBackStateNumber = reader.read();
loopBackStateNumbers.add(new Pair<>((LoopEndState) s, loopBackStateNumber));
}
else if (s instanceof BlockStartState) {
int endStateNumber = reader.readUInt16();
int endStateNumber = reader.read();
endStateNumbers.add(new Pair<>((BlockStartState) s, endStateNumber));
}
atn.addState(s);
Expand All @@ -89,37 +86,33 @@ else if (s instanceof BlockStartState) {
pair.a.endState = (BlockEndState)atn.states.get(pair.b);
}

int numNonGreedyStates = reader.readUInt16();
int numNonGreedyStates = reader.read();
for (int i = 0; i < numNonGreedyStates; i++) {
int stateNumber = reader.readUInt16();
int stateNumber = reader.read();
((DecisionState)atn.states.get(stateNumber)).nonGreedy = true;
}

int numPrecedenceStates = reader.readUInt16();
int numPrecedenceStates = reader.read();
for (int i = 0; i < numPrecedenceStates; i++) {
int stateNumber = reader.readUInt16();
int stateNumber = reader.read();
((RuleStartState)atn.states.get(stateNumber)).isLeftRecursiveRule = true;
}

//
// RULES
//
int nrules = reader.readUInt16();
int nrules = reader.read();
if ( atn.grammarType == ATNType.LEXER ) {
atn.ruleToTokenType = new int[nrules];
}

atn.ruleToStartState = new RuleStartState[nrules];
for (int i=0; i<nrules; i++) {
int s = reader.readUInt16();
int s = reader.read();
RuleStartState startState = (RuleStartState)atn.states.get(s);
atn.ruleToStartState[i] = startState;
if ( atn.grammarType == ATNType.LEXER ) {
int tokenType = reader.readUInt16();
if (tokenType == 0xFFFF) {
tokenType = Token.EOF;
}

int tokenType = reader.read();
atn.ruleToTokenType[i] = tokenType;
}
}
Expand All @@ -138,34 +131,30 @@ else if (s instanceof BlockStartState) {
//
// MODES
//
int nmodes = reader.readUInt16();
int nmodes = reader.read();
for (int i=0; i < nmodes; i++) {
int s = reader.readUInt16();
int s = reader.read();
atn.modeToStartState.add((TokensStartState)atn.states.get(s));
}

//
// SETS
//
List<IntervalSet> sets = new ArrayList<>();

// First, read all sets with 16-bit Unicode code points <= U+FFFF.
deserializeSets(reader, sets, ATNSerializer.UnicodeSerializeMode.UNICODE_BMP);

// Next, deserialize sets with 32-bit arguments <= U+10FFFF.
deserializeSets(reader, sets, ATNSerializer.UnicodeSerializeMode.UNICODE_SMP);
// Read all sets with 16-bit or 32-bit Unicode code points
IntervalSet[] sets = deserializeSets(reader);

//
// EDGES
//
int nedges = reader.readUInt16();
int nedges = reader.read();
for (int i=0; i<nedges; i++) {
int src = reader.readUInt16();
int trg = reader.readUInt16();
int ttype = reader.readUInt16();
int arg1 = reader.readUInt16();
int arg2 = reader.readUInt16();
int arg3 = reader.readUInt16();
int src = reader.read();
int trg = reader.read();
int ttype = reader.read();
int arg1 = reader.read();
int arg2 = reader.read();
int arg3 = reader.read();
Transition trans = edgeFactory(atn, ttype, src, trg, arg1, arg2, arg3, sets);
ATNState srcState = atn.states.get(src);
srcState.addTransition(trans);
Expand Down Expand Up @@ -227,9 +216,9 @@ else if (state instanceof StarLoopbackState) {
//
// DECISIONS
//
int ndecisions = reader.readUInt16();
int ndecisions = reader.read();
for (int i=1; i<=ndecisions; i++) {
int s = reader.readUInt16();
int s = reader.read();
DecisionState decState = (DecisionState)atn.states.get(s);
atn.decisionToState.add(decState);
decState.decision = i-1;
Expand All @@ -239,21 +228,10 @@ else if (state instanceof StarLoopbackState) {
// LEXER ACTIONS
//
if (atn.grammarType == ATNType.LEXER) {
atn.lexerActions = new LexerAction[reader.readUInt16()];
atn.lexerActions = new LexerAction[reader.read()];
for (int i = 0; i < atn.lexerActions.length; i++) {
LexerActionType actionType = LexerActionType.values()[reader.readUInt16()];
int data1 = reader.readUInt16();
if (data1 == 0xFFFF) {
data1 = -1;
}

int data2 = reader.readUInt16();
if (data2 == 0xFFFF) {
data2 = -1;
}

LexerAction lexerAction = lexerActionFactory(actionType, data1, data2);

LexerActionType actionType = LexerActionType.values()[reader.read()];
LexerAction lexerAction = lexerActionFactory(actionType, reader.read(), reader.read());
atn.lexerActions[i] = lexerAction;
}
}
Expand Down Expand Up @@ -357,30 +335,26 @@ else if (state instanceof StarLoopbackState) {
return atn;
}

private void deserializeSets(ATNDataReader reader, List<IntervalSet> sets, ATNSerializer.UnicodeSerializeMode mode) {
int nsets = reader.readUInt16();
private IntervalSet[] deserializeSets(ATNDataReader reader) {
int nsets = reader.read();
IntervalSet[] sets = new IntervalSet[nsets];
for (int i=0; i<nsets; i++) {
int nintervals = reader.readUInt16();
int nintervals = reader.read();
IntervalSet set = new IntervalSet();
sets.add(set);
sets[i] = set;

boolean containsEof = reader.readUInt16() != 0;
boolean containsEof = reader.read() != 0;
if (containsEof) {
set.add(-1);
}

for (int j=0; j<nintervals; j++) {
int a, b;
if (mode == ATNSerializer.UnicodeSerializeMode.UNICODE_BMP) {
a = reader.readUInt16();
b = reader.readUInt16();
} else {
a = reader.readUInt32();
b = reader.readUInt32();
}
for (int j = 0; j < nintervals; j++) {
int a = reader.read();
int b = reader.read();
set.add(a, b);
}
}
return sets;
}

/**
Expand Down Expand Up @@ -486,7 +460,7 @@ protected void checkCondition(boolean condition, String message) {
protected Transition edgeFactory(ATN atn,
int type, int src, int trg,
int arg1, int arg2, int arg3,
List<IntervalSet> sets)
IntervalSet[] sets)
{
ATNState target = atn.states.get(trg);
switch (type) {
Expand All @@ -513,8 +487,8 @@ protected Transition edgeFactory(ATN atn,
}
case Transition.ACTION :
return new ActionTransition(target, arg1, arg2, arg3 != 0);
case Transition.SET : return new SetTransition(target, sets.get(arg1));
case Transition.NOT_SET : return new NotSetTransition(target, sets.get(arg1));
case Transition.SET : return new SetTransition(target, sets[arg1]);
case Transition.NOT_SET : return new NotSetTransition(target, sets[arg1]);
case Transition.WILDCARD : return new WildcardTransition(target);
}

Expand Down
Loading

0 comments on commit a8378d3

Please sign in to comment.