Skip to content

Commit

Permalink
Implement Java-specific ATN data optimization (+-2 shift)
Browse files Browse the repository at this point in the history
  • Loading branch information
KvanTTT committed Feb 6, 2022
1 parent 8333d1a commit 60f92b8
Show file tree
Hide file tree
Showing 10 changed files with 78 additions and 62 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ protected ATN createATN(Grammar g, boolean useSerializer) {

ATN atn = g.atn;
if ( useSerializer ) {
char[] serialized = ATNSerializer.getSerializedAsChars(atn);
char[] serialized = ATNSerializer.getSerializedAsChars(atn, g.getLanguage());
return new ATNDeserializer().deserialize(serialized);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ public void testParseFile() throws IOException, NoSuchFieldException, IllegalAcc
Assert.assertNull(channels);
Assert.assertNull(modes);

char[] atnChars = ATNSerializer.getSerializedAsChars(atn);
char[] atnChars = ATNSerializer.getSerializedAsChars(atn, g.getLanguage());
Assert.assertEquals(ATNDeserializer.SERIALIZED_VERSION, atnChars[0]);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ public ATNDeserializer(ATNDeserializationOptions deserializationOptions) {
}

public ATN deserialize(char[] data) {
data = data.clone();
for (int i = 1; i < data.length; i++) {
data[i] = (char) (data[i] - 2);
}

int p = 0;
int version = toInt(data[p++]);
if (version != SERIALIZED_VERSION) {
Expand Down
31 changes: 16 additions & 15 deletions runtime/Java/src/org/antlr/v4/runtime/atn/ATNSerializer.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public ATNSerializer(ATN atn, List<String> tokenNames) {
*
* Convenient to pack into unsigned shorts to make as Java string.
*/
public IntegerList serialize() {
public IntegerList serialize(String language) {
IntegerList data = new IntegerList();
data.add(ATNDeserializer.SERIALIZED_VERSION);

Expand Down Expand Up @@ -354,14 +354,15 @@ public void serializeCodePoint(IntegerList data, int cp) {
}
}

for (int i = 0; i < data.size(); i++) {
boolean isJava = language.equals("Java");
for (int i = 1; i < data.size(); i++) {
int value = data.get(i);
if (value < Character.MIN_VALUE || value > Character.MAX_VALUE) {
throw new UnsupportedOperationException("Serialized ATN data element " +
value + " element " + i + " out of range " + (int) Character.MIN_VALUE + ".." + (int) Character.MAX_VALUE);
}

data.set(i, value);
data.set(i, isJava ? (value + 2) & 0xFFFF : value);
}

return data;
Expand Down Expand Up @@ -404,6 +405,12 @@ private static void serializeSets(
}

public String decode(char[] data) {
data = data.clone();

This comment has been minimized.

Copy link
@parrt

parrt Feb 6, 2022

Member

I'm curious: why the .clone()?

This comment has been minimized.

Copy link
@KvanTTT

KvanTTT Feb 6, 2022

Author Member

I guess to get rid of the side effect for function decode (but actually it does not break anything in existing code). Also, it was in the previous version.

// don't adjust the first value since that's the version number
for (int i = 1; i < data.length; i++) {
data[i] = (char)(data[i] - 2);
}

StringBuilder buf = new StringBuilder();
int p = 0;
int version = ATNDeserializer.toInt(data[p++]);
Expand Down Expand Up @@ -574,22 +581,16 @@ public String getTokenName(int t) {
}

/** Used by Java target to encode short/int array as chars in string. */
public static String getSerializedAsString(ATN atn) {
return new String(getSerializedAsChars(atn));
}

public static IntegerList getSerialized(ATN atn) {
return new ATNSerializer(atn).serialize();
public static String getSerializedAsString(ATN atn, String language) {
return new String(getSerializedAsChars(atn, language));
}

public static char[] getSerializedAsChars(ATN atn) {
return Utils.toCharArray(getSerialized(atn));
public static IntegerList getSerialized(ATN atn, String language) {
return new ATNSerializer(atn).serialize(language);
}

public static String getDecoded(ATN atn, List<String> tokenNames) {
IntegerList serialized = getSerialized(atn);
char[] data = Utils.toCharArray(serialized);
return new ATNSerializer(atn, tokenNames).decode(data);
public static char[] getSerializedAsChars(ATN atn, String language) {
return Utils.toCharArray(getSerialized(atn, language));
}

private void serializeInt(IntegerList data, int value) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,10 @@ public void testSetUp() throws Exception {

protected void checkDeserializationIsStable(Grammar g) {
ATN atn = createATN(g, false);
char[] data = Utils.toCharArray(ATNSerializer.getSerialized(atn));
String atnData = ATNSerializer.getDecoded(atn, Arrays.asList(g.getTokenNames()));
char[] data = Utils.toCharArray(ATNSerializer.getSerialized(atn, "Java"));
String atnData = TestATNSerialization.getDecoded(atn, Arrays.asList(g.getTokenNames()));
ATN atn2 = new ATNDeserializer().deserialize(data);
String atn2Data = ATNSerializer.getDecoded(atn2, Arrays.asList(g.getTokenNames()));
String atn2Data = TestATNSerialization.getDecoded(atn2, Arrays.asList(g.getTokenNames()));

assertEquals(atnData, atn2Data);
}
Expand Down
Loading

0 comments on commit 60f92b8

Please sign in to comment.