Skip to content

Commit

Permalink
Adding a boolean to drive byte tracking opt-in behavior (#14)
Browse files Browse the repository at this point in the history
Adding a boolean to drive byte tracking opt-in behavior
  • Loading branch information
DarrenJAN authored Dec 3, 2024
1 parent 344f282 commit 27511be
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 8 deletions.
15 changes: 11 additions & 4 deletions src/main/java/org/apache/commons/csv/CSVParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ public static class Builder extends AbstractStreamBuilder<CSVParser, Builder> {
private CSVFormat format;
private long characterOffset;
private long recordNumber = 1;
private boolean enableByteTracking = false;

/**
* Constructs a new instance.
Expand All @@ -164,7 +165,7 @@ protected Builder() {
@SuppressWarnings("resource")
@Override
public CSVParser get() throws IOException {
return new CSVParser(getReader(), format != null ? format : CSVFormat.DEFAULT, characterOffset, recordNumber, getCharset());
return new CSVParser(getReader(), format != null ? format : CSVFormat.DEFAULT, characterOffset, recordNumber, getCharset(), enableByteTracking);
}

/**
Expand Down Expand Up @@ -200,6 +201,11 @@ public Builder setRecordNumber(final long recordNumber) {
return asThis();
}

public Builder setEnableByteTracking(final boolean enableByteTracking) {
this.enableByteTracking = enableByteTracking;
return asThis();
}

}

final class CSVRecordIterator implements Iterator<CSVRecord> {
Expand Down Expand Up @@ -507,7 +513,7 @@ public CSVParser(final Reader reader, final CSVFormat format) throws IOException
@SuppressWarnings("resource")
public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber)
throws IOException {
this(reader, format, characterOffset, recordNumber, null);
this(reader, format, characterOffset, recordNumber, null, false);
}

/**
Expand Down Expand Up @@ -535,12 +541,13 @@ public CSVParser(final Reader reader, final CSVFormat format, final long charact
* @throws CSVException Thrown on invalid input.
* @since 1.13.0.
*/
private CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber, final Charset charset)
private CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber,
final Charset charset, final boolean enableByteTracking)
throws IOException {
Objects.requireNonNull(reader, "reader");
Objects.requireNonNull(format, "format");
this.format = format.copy();
this.lexer = new Lexer(format, new ExtendedBufferedReader(reader, charset));
this.lexer = new Lexer(format, new ExtendedBufferedReader(reader, charset, enableByteTracking));
this.csvRecordIterator = new CSVRecordIterator();
this.headers = createHeaders();
this.characterOffset = characterOffset;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@ final class ExtendedBufferedReader extends UnsynchronizedBufferedReader {
super(reader);
}

ExtendedBufferedReader(final Reader reader, Charset charset) {
ExtendedBufferedReader(final Reader reader, Charset charset, boolean enableByteTracking) {
super(reader);
if (charset != null) {
if (charset != null && enableByteTracking) {
encoder = charset.newEncoder();
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/test/java/org/apache/commons/csv/CSVParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -711,7 +711,7 @@ public void testGetRecordThreeBytesRead() throws Exception {
.setDelimiter(',')
.setQuote('\'')
.get();
try (CSVParser parser = CSVParser.builder().setReader(new StringReader(code)).setFormat(format).setCharset(UTF_8).get() ) {
try (CSVParser parser = CSVParser.builder().setReader(new StringReader(code)).setFormat(format).setCharset(UTF_8).setEnableByteTracking(true).get() ) {
CSVRecord record = new CSVRecord(parser, null, null, 1L, 0L, 0L);

assertEquals(0, parser.getRecordNumber());
Expand Down Expand Up @@ -748,7 +748,7 @@ public void testGetRecordFourBytesRead() throws Exception {
.setDelimiter(',')
.setQuote('\'')
.get();
try (CSVParser parser = CSVParser.builder().setReader(new StringReader(code)).setFormat(format).setCharset(UTF_8).get()) {
try (CSVParser parser = CSVParser.builder().setReader(new StringReader(code)).setFormat(format).setCharset(UTF_8).setEnableByteTracking(true).get()) {
CSVRecord record = new CSVRecord(parser, null, null, 1L, 0L, 0L);

assertEquals(0, parser.getRecordNumber());
Expand Down
2 changes: 2 additions & 0 deletions src/test/java/org/apache/commons/csv/JiraCsv196Test.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ public void parseThreeBytes() throws IOException {
.setFormat(format)
.setReader(getTestInput("org/apache/commons/csv/CSV-196/japanese.csv"))
.setCharset(StandardCharsets.UTF_8)
.setEnableByteTracking(true)
.get();
long[] charByteKey = {0, 89, 242, 395};
int idx = 0;
Expand All @@ -57,6 +58,7 @@ public void parseFourBytes() throws IOException {
.setFormat(format)
.setReader(getTestInput("org/apache/commons/csv/CSV-196/emoji.csv"))
.setCharset(StandardCharsets.UTF_8)
.setEnableByteTracking(true)
.get();
long[] charByteKey = {0, 84, 701, 1318, 1935};
int idx = 0;
Expand Down

0 comments on commit 27511be

Please sign in to comment.