From 36c52a537a06b812b8855c6d5cf176dd58c14540 Mon Sep 17 00:00:00 2001 From: NebelNidas <48808497+NebelNidas@users.noreply.github.com> Date: Wed, 17 Apr 2024 13:50:36 +0200 Subject: [PATCH] Overhaul `ColumnFileReader` (#82) * Overhaul `ColumnFileReader` * Return null also when freshly encountering EOL as first char Achieved in part by not always seeking to the start of the next column * Fix issue with start offset * Add changes to changelog --- CHANGELOG.md | 1 + .../mappingio/format/ColumnFileReader.java | 346 +++++++++++------- .../format/enigma/EnigmaFileReader.java | 2 +- .../mappingio/format/jobf/JobfFileReader.java | 2 +- .../format/simple/RecafSimpleFileReader.java | 2 +- .../mappingio/format/srg/JamFileReader.java | 2 +- .../mappingio/format/srg/SrgFileReader.java | 2 +- .../mappingio/format/srg/TsrgFileReader.java | 74 ++-- .../format/tiny/Tiny1FileReader.java | 4 +- .../format/tiny/Tiny2FileReader.java | 8 +- 10 files changed, 268 insertions(+), 175 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8fc05537..02e41f10 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +- Overhauled the internal `ColumnFileReader` to behave more consistently and future-proof ## [0.6.1] - 2024-04-15 - Fixed CSRG and JAM writers sometimes skipping elements whose parents have incomplete destination names diff --git a/src/main/java/net/fabricmc/mappingio/format/ColumnFileReader.java b/src/main/java/net/fabricmc/mappingio/format/ColumnFileReader.java index 8b3b996a..4d26aa0b 100644 --- a/src/main/java/net/fabricmc/mappingio/format/ColumnFileReader.java +++ b/src/main/java/net/fabricmc/mappingio/format/ColumnFileReader.java @@ -31,8 +31,9 @@ */ @ApiStatus.Internal public final class ColumnFileReader implements Closeable { - public ColumnFileReader(Reader reader, char columnSeparator) { + public ColumnFileReader(Reader reader, char indentationChar, char columnSeparator) { this.reader = reader; + this.indentationChar = indentationChar; this.columnSeparator = columnSeparator; } @@ -46,44 +47,17 @@ public void close() throws IOException { * *
The reader will point to the next column or end of line if successful, otherwise remains unchanged.
*
- * @param expect Content to expect.
- * @return {@code true} if the column was read and had the expected content, false otherwise.
+ * @param expected Content to expect.
+ * @return {@code true} if the column was read and had the expected content, {@code false} otherwise.
*/
- public boolean nextCol(String expect) throws IOException {
- if (eol) return false;
-
- int len = expect.length();
- if (!fillBuffer(len)) return false;
-
- for (int i = 0; i < len; i++) {
- if (buffer[bufferPos + i] != expect.charAt(i)) return false; // read failed, not all of expect available
- }
-
- char trailing = 0;
-
- if (fillBuffer(len + 1) // not eof
- && (trailing = buffer[bufferPos + len]) != columnSeparator // not end of column
- && trailing != '\n' // not end of line
- && trailing != '\r') {
- return false; // read failed, column contains data beyond expect
- }
-
- // successful read
-
- bufferPos += expect.length();
-
- // seek to the start of the next column
- if (trailing == columnSeparator) {
- bufferPos++;
- } else {
- eol = true;
- }
-
- return true;
+ public boolean nextCol(String expected) throws IOException {
+ return read(false, false, true, expected) != noMatch;
}
/**
* Read and consume a column without unescaping.
+ *
+ * @return {@code null} if nothing has been read (first char was EOL), otherwise the read string (may be empty).
*/
@Nullable
public String nextCol() throws IOException {
@@ -91,36 +65,80 @@ public String nextCol() throws IOException {
}
/**
- * Read and consume a column with unescaping.
+ * Read and consume a column, and unescape it if requested.
+ *
+ * @return {@code null} if nothing has been read (first char was EOL), otherwise the read string (may be empty).
*/
@Nullable
- public String nextEscapedCol() throws IOException {
- return nextCol(true);
+ public String nextCol(boolean unescape) throws IOException {
+ return read(unescape, true, true, null);
}
/**
- * Read and consume a column and unescape it if requested.
+ * Read a column without consuming, and unescape if requested.
+ * Since it doesn't consume, it won't (un)mark BOF, EOL or EOF.
+ *
+ * @return {@code null} if nothing has been read (first char was EOL), otherwise the read string (may be empty).
*/
@Nullable
- public String nextCol(boolean unescape) throws IOException {
- if (eol) return null;
+ public String peekCol(boolean unescape) throws IOException {
+ return read(unescape, false, true, null);
+ }
+
+ /**
+ * @param unescape Whether to unescape the read string.
+ * @param consume Whether to advance the bufferPos.
+ * @param stopAtNextCol Whether to only read one column.
+ * @param expected If not {@code null}, the read string must match this exactly, otherwise we early-exit with {@link #noMatch}. Always consumes if matched.
+ *
+ * @return {@code null} if nothing has been read (first char was EOL), otherwise the read string (may be empty).
+ * If {@code expected} is not {@code null}, it will be returned if matched, otherwise {@link #noMatch}.
+ */
+ @Nullable
+ private String read(boolean unescape, boolean consume, boolean stopAtNextCol, @Nullable String expected) throws IOException {
+ if (eol) return expected == null ? null : noMatch;
+
+ int expectedLength = expected != null ? expected.length() : -1;
+
+ // Check if the buffer needs to be filled and if we hit EOF while doing so
+ if (expectedLength > 0 && bufferPos + expectedLength >= bufferLimit) {
+ if (!fillBuffer(expectedLength, !consume, false)) return noMatch;
+ }
int start;
- int end = bufferPos;
+ int end = this.bufferPos;
int firstEscaped = -1;
+ int contentCharsRead = 0;
+ int modifiedBufferPos = -1;
+ int startOffset = 0;
+ boolean readAnything = false;
+ boolean filled = true;
readLoop: for (;;) {
while (end < bufferLimit) {
char c = buffer[end];
+ boolean isColumnSeparator = (c == columnSeparator);
+
+ // skip leading column separator
+ if (isColumnSeparator && !readAnything) {
+ startOffset = 1;
+ contentCharsRead = -1;
+ }
+
+ readAnything = true;
+
+ if (expected != null && contentCharsRead > -1) {
+ if ((contentCharsRead < expectedLength && c != expected.charAt(contentCharsRead))
+ || contentCharsRead > expectedLength) {
+ return noMatch;
+ }
+ }
- if (c == columnSeparator || c == '\n' || c == '\r') { // end of the current column
+ if (c == '\n' || c == '\r' || (isColumnSeparator && stopAtNextCol && contentCharsRead > -1)) { // stop reading
start = bufferPos;
- bufferPos = end;
+ modifiedBufferPos = end;
- // seek to the start of the next column
- if (c == columnSeparator) {
- bufferPos++;
- } else {
+ if (!isColumnSeparator && (consume || expected != null)) {
eol = true;
}
@@ -129,13 +147,14 @@ public String nextCol(boolean unescape) throws IOException {
firstEscaped = bufferPos;
}
+ contentCharsRead++;
end++;
}
// buffer ran out, refill
int oldStart = bufferPos;
- boolean filled = fillBuffer(end - bufferPos + 1);
+ filled = fillBuffer(end - bufferPos + 1, !consume, consume);
int posShift = bufferPos - oldStart; // fillBuffer may compact the data, shifting it to the buffer start
assert posShift <= 0;
end += posShift;
@@ -143,74 +162,70 @@ public String nextCol(boolean unescape) throws IOException {
if (!filled) {
start = bufferPos;
- bufferPos = end;
- eol = true;
break;
}
}
- int len = end - start;
+ start += startOffset;
+ String ret;
- if (len == 0) {
- return "";
- } else if (firstEscaped >= 0) {
- return Tiny2Util.unescape(String.valueOf(buffer, start, len));
+ if (expected != null) {
+ consume = true;
+ ret = expected;
} else {
- return String.valueOf(buffer, start, len);
+ int len = end - start;
+
+ if (len == 0) {
+ ret = readAnything ? "" : null;
+ } else if (firstEscaped >= 0) {
+ ret = Tiny2Util.unescape(String.valueOf(buffer, start, len));
+ } else {
+ ret = String.valueOf(buffer, start, len);
+ }
}
- }
-
- /**
- * Read and consume all column until eol and unescape if requested.
- */
- @Nullable
- public String nextCols(boolean unescape) throws IOException {
- if (eol) return null;
- int end = bufferPos;
- int firstEscaped = -1;
- boolean filled;
+ if (consume) {
+ if (readAnything) bof = false;
+ if (!filled) eof = eol = true;
+ if (modifiedBufferPos != -1) bufferPos = modifiedBufferPos;
- readLoop: do {
- while (end < bufferLimit) {
- char c = buffer[end];
+ if (eol && !eof) { // manually check for EOF
+ int charsToRead = buffer[bufferPos] == '\r' ? 2 : 1; // 2 for \r\n, 1 for just \n
- if (c == '\n' || c == '\r') { // end of the current column
- break readLoop;
- } else if (unescape && c == '\\' && firstEscaped < 0) {
- firstEscaped = bufferPos;
+ if (end >= bufferLimit - charsToRead) {
+ fillBuffer(charsToRead, false, true);
}
-
- end++;
}
+ }
- // buffer ran out, refill
-
- int oldStart = bufferPos;
- filled = fillBuffer(end - bufferPos + 1);
- int posShift = bufferPos - oldStart; // fillBuffer may compact the data, shifting it to the buffer start
- assert posShift <= 0;
- end += posShift;
- if (firstEscaped >= 0) firstEscaped += posShift;
- } while (filled);
-
- int start = bufferPos;
- bufferPos = end;
- eol = true;
+ return ret;
+ }
- int len = end - start;
+ /**
+ * Read and consume all columns until EOL, and unescape if requested.
+ *
+ * @return {@code null} if nothing has been read (first char was EOL), otherwise the read string (may be empty).
+ */
+ @Nullable
+ public String nextCols(boolean unescape) throws IOException {
+ return read(unescape, true, false, null);
+ }
- if (len == 0) {
- return "";
- } else if (firstEscaped >= 0) {
- return Tiny2Util.unescape(String.valueOf(buffer, start, len));
- } else {
- return String.valueOf(buffer, start, len);
- }
+ /**
+ * Read all columns until EOL without consuming, and unescape if requested.
+ * Since it doesn't consume, it won't (un)mark BOF, EOL or EOF.
+ *
+ * @return {@code null} if nothing has been read (first char was EOL), otherwise the read string (may be empty).
+ */
+ @Nullable
+ public String peekCols(boolean unescape) throws IOException {
+ return read(unescape, false, false, null);
}
/**
* Read and consume a column and convert it to integer.
+ *
+ * @return -1 if nothing has been read (first char was EOL), otherwise the number present.
*/
public int nextIntCol() throws IOException {
String str = nextCol(false);
@@ -223,87 +238,167 @@ public int nextIntCol() throws IOException {
}
public boolean nextLine(int indent) throws IOException {
- fillLopo: do {
+ fillLoop: do {
while (bufferPos < bufferLimit) {
char c = buffer[bufferPos];
if (c == '\n') {
if (indent == 0) { // skip empty lines if indent is 0
- if (!fillBuffer(2)) break fillLopo;
+ if (!fillBuffer(2, false, true)) break fillLoop;
c = buffer[bufferPos + 1];
if (c == '\n' || c == '\r') { // 2+ consecutive new lines, consume first nl and retry
bufferPos++;
lineNumber++;
+ bof = false;
continue;
}
}
- if (!fillBuffer(indent + 1)) return false;
+ if (!fillBuffer(indent + 1, false, true)) return false;
for (int i = 1; i <= indent; i++) {
- if (buffer[bufferPos + i] != '\t') return false;
+ if (buffer[bufferPos + i] != indentationChar) return false;
}
bufferPos += indent + 1;
lineNumber++;
+ bof = false;
eol = false;
return true;
}
bufferPos++;
+ bof = false;
}
- } while (fillBuffer(1));
+ } while (fillBuffer(1, false, true));
return false;
}
public boolean hasExtraIndents() throws IOException {
- return fillBuffer(1) && buffer[bufferPos] == '\t';
+ return fillBuffer(1, false, false) && buffer[bufferPos] == indentationChar;
}
public int getLineNumber() {
return lineNumber;
}
+ /**
+ * Whether or not EOL has been encountered in the current line yet.
+ */
+ public boolean isAtEol() {
+ return eol;
+ }
+
+ public boolean isAtBof() {
+ return bof;
+ }
+
public boolean isAtEof() {
return eof;
}
- public void mark() {
- if (bufferPos > 0) {
+ /**
+ * Marks the present position in the stream. Subsequent calls to
+ * {@link #reset()} will reposition the stream to this point.
+ * In comparison to {@link java.io.Reader#mark(int)} this method stacks,
+ * so don't forget to call {@link #discardMark()} if you don't need the mark anymore.
+ *
+ * @return the mark index (starting at 1)
+ */
+ public int mark() {
+ if (markIdx == 0 && bufferPos > 0) { // save memory
int available = bufferLimit - bufferPos;
System.arraycopy(buffer, bufferPos, buffer, 0, available);
bufferPos = 0;
bufferLimit = available;
- markedLineNumber = lineNumber;
- markedEol = eol;
- markedEof = eof;
}
- mark = bufferPos;
+ if (markIdx == markedBufferPositions.length) {
+ markedBufferPositions = Arrays.copyOf(markedBufferPositions, markedBufferPositions.length * 2);
+ markedLineNumbers = Arrays.copyOf(markedLineNumbers, markedLineNumbers.length * 2);
+ markedBofs = Arrays.copyOf(markedBofs, markedBofs.length * 2);
+ markedEols = Arrays.copyOf(markedEols, markedEols.length * 2);
+ markedEofs = Arrays.copyOf(markedEofs, markedEofs.length * 2);
+ }
+
+ markedBufferPositions[markIdx] = bufferPos;
+ markedLineNumbers[markIdx] = lineNumber;
+ markedBofs[markIdx] = bof;
+ markedEols[markIdx] = eol;
+ markedEofs[markIdx] = eof;
+
+ return ++markIdx;
}
- public void reset() {
- if (mark < 0) throw new IllegalStateException("not marked");
+ /**
+ * Discard the last mark.
+ */
+ public void discardMark() {
+ discardMark(markIdx);
+ }
+
+ /**
+ * Discard the mark at specified index and all above, if present.
+ */
+ private void discardMark(int index) {
+ if (markIdx == 0) throw new IllegalStateException("no mark to discard");
+ if (index < 1 || index > markIdx) throw new IllegalStateException("index out of bounds");
+
+ for (int i = markIdx; i >= index; i--) {
+ markedBufferPositions[i-1] = 0;
+ markedLineNumbers[i-1] = 0;
+ }
+
+ markIdx = index - 1;
+ }
+
+ /**
+ * Reset to last mark. The marked data isn't discarded, so can be called multiple times.
+ * If you want to reset to an older mark, use {@link #reset(int)}.
+ *
+ * @return The index of the mark that was reset to.
+ */
+ public int reset() {
+ reset(markIdx);
+ return markIdx;
+ }
+
+ /**
+ * Reset to the mark with the specified index.
+ * Unless reset to 0, the marked data isn't discarded afterwards,
+ * so can be called multiple times.
+ * Use negative indices to reset to a mark relative to the current one.
+ */
+ public void reset(int indexToResetTo) {
+ if (markIdx == 0) throw new IllegalStateException("no mark to reset to");
+ if (indexToResetTo < -markIdx || indexToResetTo > markIdx) throw new IllegalStateException("index out of bounds");
+
+ if (indexToResetTo < 0) indexToResetTo += markIdx;
+ int arrayIdx = indexToResetTo == 0 ? indexToResetTo : indexToResetTo - 1;
+
+ bufferPos = markedBufferPositions[arrayIdx];
+ lineNumber = markedLineNumbers[arrayIdx];
+ bof = markedBofs[arrayIdx];
+ eol = markedEols[arrayIdx];
+ eof = markedEofs[arrayIdx];
- bufferPos = mark;
- lineNumber = markedLineNumber;
- eol = markedEol;
- eof = markedEof;
+ if (indexToResetTo == 0) discardMark(1);
+ markIdx = indexToResetTo;
}
- private boolean fillBuffer(int count) throws IOException {
+ private boolean fillBuffer(int count, boolean preventCompaction, boolean markEof) throws IOException {
int available = bufferLimit - bufferPos;
int req = count - available;
if (req <= 0) return true;
if (bufferPos + count > buffer.length) { // not enough remaining buffer space
- if (mark >= 0) { // marked for rewind -> grow
+ if (markIdx > 0 || preventCompaction) { // can't compact -> grow
buffer = Arrays.copyOf(buffer, Math.max(bufferPos + count, buffer.length * 2));
- } else { // not marked, compact and grow as needed
+ } else { // compact and grow as needed
if (count > buffer.length) { // too small for compacting to suffice -> grow and compact
char[] newBuffer = new char[Math.max(count, buffer.length * 2)];
System.arraycopy(buffer, bufferPos, newBuffer, 0, available);
@@ -323,7 +418,7 @@ private boolean fillBuffer(int count) throws IOException {
int read = reader.read(buffer, bufferLimit, buffer.length - bufferLimit);
if (read < 0) {
- eof = eol = true;
+ if (markEof) eof = eol = true;
return false;
}
@@ -333,16 +428,21 @@ private boolean fillBuffer(int count) throws IOException {
return true;
}
+ private static final String noMatch = new String();
private final Reader reader;
+ private final char indentationChar;
private final char columnSeparator;
private char[] buffer = new char[4096 * 4];
private int bufferPos;
private int bufferLimit;
- private int mark = -1;
private int lineNumber = 1;
+ private boolean bof = true;
private boolean eol; // tracks whether the last column has been read, otherwise ambiguous if the last col is empty
private boolean eof;
- private int markedLineNumber;
- private boolean markedEol;
- private boolean markedEof;
+ private int markIdx = 0; // 0 means no mark
+ private int[] markedBufferPositions = new int[3];
+ private int[] markedLineNumbers = new int[3];
+ private boolean[] markedBofs = new boolean[3];
+ private boolean[] markedEols = new boolean[3];
+ private boolean[] markedEofs = new boolean[3];
}
diff --git a/src/main/java/net/fabricmc/mappingio/format/enigma/EnigmaFileReader.java b/src/main/java/net/fabricmc/mappingio/format/enigma/EnigmaFileReader.java
index 3590ccde..c44e64a1 100644
--- a/src/main/java/net/fabricmc/mappingio/format/enigma/EnigmaFileReader.java
+++ b/src/main/java/net/fabricmc/mappingio/format/enigma/EnigmaFileReader.java
@@ -45,7 +45,7 @@ public static void read(Reader reader, MappingVisitor visitor) throws IOExceptio
}
public static void read(Reader reader, String sourceNs, String targetNs, MappingVisitor visitor) throws IOException {
- read(new ColumnFileReader(reader, ' '), sourceNs, targetNs, visitor);
+ read(new ColumnFileReader(reader, '\t', ' '), sourceNs, targetNs, visitor);
}
public static void read(ColumnFileReader reader, String sourceNs, String targetNs, MappingVisitor visitor) throws IOException {
diff --git a/src/main/java/net/fabricmc/mappingio/format/jobf/JobfFileReader.java b/src/main/java/net/fabricmc/mappingio/format/jobf/JobfFileReader.java
index 76b4a842..7700db8b 100644
--- a/src/main/java/net/fabricmc/mappingio/format/jobf/JobfFileReader.java
+++ b/src/main/java/net/fabricmc/mappingio/format/jobf/JobfFileReader.java
@@ -39,7 +39,7 @@ public static void read(Reader reader, MappingVisitor visitor) throws IOExceptio
}
public static void read(Reader reader, String sourceNs, String targetNs, MappingVisitor visitor) throws IOException {
- read(new ColumnFileReader(reader, ' '), sourceNs, targetNs, visitor);
+ read(new ColumnFileReader(reader, '\t', ' '), sourceNs, targetNs, visitor);
}
private static void read(ColumnFileReader reader, String sourceNs, String targetNs, MappingVisitor visitor) throws IOException {
diff --git a/src/main/java/net/fabricmc/mappingio/format/simple/RecafSimpleFileReader.java b/src/main/java/net/fabricmc/mappingio/format/simple/RecafSimpleFileReader.java
index 4d4c3c59..8b4c9358 100644
--- a/src/main/java/net/fabricmc/mappingio/format/simple/RecafSimpleFileReader.java
+++ b/src/main/java/net/fabricmc/mappingio/format/simple/RecafSimpleFileReader.java
@@ -42,7 +42,7 @@ public static void read(Reader reader, MappingVisitor visitor) throws IOExceptio
}
public static void read(Reader reader, String sourceNs, String targetNs, MappingVisitor visitor) throws IOException {
- read(new ColumnFileReader(reader, ' '), sourceNs, targetNs, visitor);
+ read(new ColumnFileReader(reader, '\t', ' '), sourceNs, targetNs, visitor);
}
private static void read(ColumnFileReader reader, String sourceNs, String targetNs, MappingVisitor visitor) throws IOException {
diff --git a/src/main/java/net/fabricmc/mappingio/format/srg/JamFileReader.java b/src/main/java/net/fabricmc/mappingio/format/srg/JamFileReader.java
index 2f9b57a4..8ad1eb47 100644
--- a/src/main/java/net/fabricmc/mappingio/format/srg/JamFileReader.java
+++ b/src/main/java/net/fabricmc/mappingio/format/srg/JamFileReader.java
@@ -45,7 +45,7 @@ public static void read(Reader reader, MappingVisitor visitor) throws IOExceptio
}
public static void read(Reader reader, String sourceNs, String targetNs, MappingVisitor visitor) throws IOException {
- read(new ColumnFileReader(reader, ' '), sourceNs, targetNs, visitor);
+ read(new ColumnFileReader(reader, '\t', ' '), sourceNs, targetNs, visitor);
}
private static void read(ColumnFileReader reader, String sourceNs, String targetNs, MappingVisitor visitor) throws IOException {
diff --git a/src/main/java/net/fabricmc/mappingio/format/srg/SrgFileReader.java b/src/main/java/net/fabricmc/mappingio/format/srg/SrgFileReader.java
index 912fc6a7..77b0360c 100644
--- a/src/main/java/net/fabricmc/mappingio/format/srg/SrgFileReader.java
+++ b/src/main/java/net/fabricmc/mappingio/format/srg/SrgFileReader.java
@@ -46,7 +46,7 @@ public static void read(Reader reader, MappingVisitor visitor) throws IOExceptio
}
public static void read(Reader reader, String sourceNs, String targetNs, MappingVisitor visitor) throws IOException {
- read(new ColumnFileReader(reader, ' '), sourceNs, targetNs, visitor);
+ read(new ColumnFileReader(reader, '\t', ' '), sourceNs, targetNs, visitor);
}
private static void read(ColumnFileReader reader, String sourceNs, String targetNs, MappingVisitor visitor) throws IOException {
diff --git a/src/main/java/net/fabricmc/mappingio/format/srg/TsrgFileReader.java b/src/main/java/net/fabricmc/mappingio/format/srg/TsrgFileReader.java
index dec67e7a..c50ba709 100644
--- a/src/main/java/net/fabricmc/mappingio/format/srg/TsrgFileReader.java
+++ b/src/main/java/net/fabricmc/mappingio/format/srg/TsrgFileReader.java
@@ -16,7 +16,6 @@
package net.fabricmc.mappingio.format.srg;
-import java.io.CharArrayReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
@@ -44,7 +43,7 @@ private TsrgFileReader() {
}
public static List