forked from antlr/antlr4
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Allow ATN serialization of values more than 65535 (writeCompactUInt32)
Refactor ATN serializer and deserializer, use ATNDataWriter, ATNDataReader Remove excess data cloning in deserializer fixes antlr#1863, fixes antlr#2732, fixes antlr#3338
- Loading branch information
Showing
8 changed files
with
353 additions
and
412 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
53 changes: 53 additions & 0 deletions
53
runtime/Java/src/org/antlr/v4/runtime/atn/ATNDataReader.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
package org.antlr.v4.runtime.atn; | ||
|
||
import java.util.UUID; | ||
|
||
public class ATNDataReader { | ||
private final char[] data; | ||
private int p; | ||
|
||
public ATNDataReader(char[] data) { | ||
this.data = data; | ||
} | ||
|
||
public UUID readUUID() { | ||
long leastSigBits = ((long) readUInt32() & 0x00000000FFFFFFFFL) | ((long) readUInt32() << 32); | ||
long mostSigBits = (long) readUInt32() | ((long) readUInt32() << 32); | ||
return new UUID(mostSigBits, leastSigBits); | ||
} | ||
|
||
public int readUInt32() { | ||
return readUInt16() | (readUInt16() << 16); | ||
} | ||
|
||
public int readCompactUInt32() { | ||
int value = readUInt16(); | ||
return value < 0b1000_0000_0000_0000 && value >= 0 | ||
? value | ||
: (readUInt16() << 15) | (value & 0b0111_1111_1111_1111); | ||
} | ||
|
||
public int readUInt16() { | ||
return readUInt16(true); | ||
} | ||
|
||
public int readUInt16(boolean normalize) { | ||
int result = data[p++]; | ||
// Each char value in data is shifted by +2 at the entry to this method. | ||
// This is an encoding optimization targeting the serialized values 0 | ||
// and -1 (serialized to 0xFFFF), each of which are very common in the | ||
// serialized form of the ATN. In the modified UTF-8 that Java uses for | ||
// compiled string literals, these two character values have multi-byte | ||
// forms. By shifting each value by +2, they become characters 2 and 1 | ||
// prior to writing the string, each of which have single-byte | ||
// representations. Since the shift occurs in the tool during ATN | ||
// serialization, each target is responsible for adjusting the values | ||
// during deserialization. | ||
// | ||
// As a special case, note that the first element of data is not | ||
// adjusted because it contains the major version number of the | ||
// serialized ATN, which was fixed at 3 at the time the value shifting | ||
// was implemented. | ||
return normalize ? (result > 1 ? result - ATNDataWriter.OptimizeOffset : result + 65534) : result; | ||
} | ||
} |
53 changes: 53 additions & 0 deletions
53
runtime/Java/src/org/antlr/v4/runtime/atn/ATNDataWriter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
package org.antlr.v4.runtime.atn; | ||
|
||
import org.antlr.v4.runtime.misc.IntegerList; | ||
|
||
import java.util.UUID; | ||
|
||
public class ATNDataWriter { | ||
public static final int OptimizeOffset = 2; | ||
|
||
private final IntegerList data; | ||
|
||
public ATNDataWriter(IntegerList data) { | ||
this.data = data; | ||
} | ||
|
||
public void writeUUID(UUID uuid) { | ||
long leastSignificantBits = uuid.getLeastSignificantBits(); | ||
writeUInt32((int)leastSignificantBits); | ||
writeUInt32((int)(leastSignificantBits >> 32)); | ||
long mostSignificantBits = uuid.getMostSignificantBits(); | ||
writeUInt32((int)mostSignificantBits); | ||
writeUInt32((int)(mostSignificantBits >> 32)); | ||
} | ||
|
||
public void writeUInt32(int value) { | ||
writeUInt16((char)value); | ||
writeUInt16((char)(value >> 16)); | ||
} | ||
|
||
public void writeCompactUInt32(int value) { | ||
if (value < 0b1000_0000_0000_0000) { | ||
writeUInt16(value); | ||
} else { | ||
writeUInt16((value & 0b0111_1111_1111_1111) | (1 << 15)); | ||
writeUInt16(value >>> 15); | ||
} | ||
} | ||
|
||
public void writeUInt16(int value) { | ||
writeUInt16(value, true); | ||
} | ||
|
||
public void writeUInt16(int value, boolean optimize) { | ||
if (value < Character.MIN_VALUE || value > Character.MAX_VALUE) { | ||
throw new UnsupportedOperationException("Serialized ATN data element "+ | ||
data.size() + " element " + value + " out of range "+ | ||
(int)Character.MIN_VALUE + ".." + (int)Character.MAX_VALUE); | ||
} | ||
// Note: This value shifting loop is documented in ATNDeserializer. | ||
// don't adjust the first value since that's the version number | ||
data.add(optimize ? (value + OptimizeOffset) & 0xFFFF : value); | ||
} | ||
} |
Oops, something went wrong.