forked from FasterXML/jackson-dataformats-text
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Allow escaping of non-printable characters in CSV output/input
This is a proposed solution for FasterXML#124. It introduces a new Feature, `ESCAPE_CONTROL_CHARS_WITH_ESCAPE_CHAR`, which will apply the standard ASCII escapes from JSON to all characters that the CSV generator writes. If this solution is workable, I will add tests.
- Loading branch information
1 parent
62ff5b2
commit a9ce6fa
Showing
4 changed files
with
304 additions
and
12 deletions.
There are no files selected for viewing
101 changes: 101 additions & 0 deletions
101
csv/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvCharacterEscapes.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
package com.fasterxml.jackson.dataformat.csv; | ||
|
||
import com.fasterxml.jackson.core.SerializableString; | ||
import com.fasterxml.jackson.core.io.CharTypes; | ||
import com.fasterxml.jackson.core.io.CharacterEscapes; | ||
import com.fasterxml.jackson.dataformat.csv.CsvGenerator.Feature; | ||
|
||
/** | ||
* Character escapes for CSV. There are multiple types of escapes. | ||
* | ||
* <ul> | ||
* <li>no escapes - return all characters the same way they are defined</li> | ||
* <li>quote escape - return all characters except the quote character which is escaped (backwards compat) </li> | ||
* <li>control escape - same as {@link CharTypes#get7BitOutputEscapes()}, escape all control characters</li> | ||
* <li> control and quote escape - do not double up quote, escape control characters and quote.</li> | ||
* </ul> | ||
*/ | ||
public final class CsvCharacterEscapes extends CharacterEscapes | ||
{ | ||
|
||
private static final long serialVersionUID = 1L; | ||
|
||
// No character escapes, every character returned as is. | ||
private static final CsvCharacterEscapes sNoEscapesInstance = new CsvCharacterEscapes(new int[0]); | ||
|
||
// Only escape quotes, controlled by {@link Feature#ESCAPE_QUOTE_CHAR_WITH_ESCAPE_CHAR}. | ||
private static final CsvCharacterEscapes sQuoteEscapesInstance; | ||
|
||
// Only escape control chars, do *not* escape the quote char. See (@link Feature#ESCAPE_CONTROL_CHARS_WITH_ESCAPE_CHAR}. | ||
private static final CsvCharacterEscapes sControlEscapesInstance; | ||
|
||
// Escape control chars and the quote char. | ||
private static final CsvCharacterEscapes sControlQuoteEscapesInstance = new CsvCharacterEscapes(CharacterEscapes.standardAsciiEscapesForJSON()); | ||
|
||
private static final CsvCharacterEscapes [] sEscapes; | ||
|
||
static { | ||
int[] quoteEscapes = new int[(int) '"' + 1]; | ||
quoteEscapes[(int) '"'] = '"'; | ||
sQuoteEscapesInstance = new CsvCharacterEscapes(quoteEscapes); | ||
|
||
int[] controlEscapes = CharacterEscapes.standardAsciiEscapesForJSON(); | ||
controlEscapes['"'] = 0; // do not escape ", double it up. | ||
sControlEscapesInstance = new CsvCharacterEscapes(controlEscapes); | ||
|
||
sEscapes = new CsvCharacterEscapes[4]; | ||
sEscapes[0] = sNoEscapesInstance; | ||
sEscapes[1] = sQuoteEscapesInstance; | ||
sEscapes[2] = sControlEscapesInstance; | ||
sEscapes[3] = sControlQuoteEscapesInstance; | ||
} | ||
|
||
|
||
private final int[] escapes; | ||
|
||
private CsvCharacterEscapes(int[] escapes) | ||
{ | ||
this.escapes = escapes; | ||
} | ||
|
||
public static CsvCharacterEscapes noEscapesInstance() | ||
{ | ||
return sNoEscapesInstance; | ||
} | ||
|
||
public static CsvCharacterEscapes quoteEscapesInstance() | ||
{ | ||
return sQuoteEscapesInstance; | ||
} | ||
|
||
public static CsvCharacterEscapes controlEscapesInstance() | ||
{ | ||
return sControlEscapesInstance; | ||
} | ||
|
||
public static CsvCharacterEscapes controlQuoteEscapesInstance() | ||
{ | ||
return sControlQuoteEscapesInstance; | ||
} | ||
|
||
public static CsvCharacterEscapes fromCsvFeatures(int csvFeatures) | ||
{ | ||
int idx = 0; | ||
idx |= CsvGenerator.Feature.ESCAPE_QUOTE_CHAR_WITH_ESCAPE_CHAR.enabledIn(csvFeatures) ? 1 : 0; | ||
idx |= Feature.ESCAPE_CONTROL_CHARS_WITH_ESCAPE_CHAR.enabledIn(csvFeatures) ? 2 : 0; | ||
|
||
return sEscapes[idx]; | ||
} | ||
|
||
@Override | ||
public SerializableString getEscapeSequence(int ch) | ||
{ | ||
return null; // unused for CSV escapes | ||
} | ||
|
||
@Override | ||
public int[] getEscapeCodesForAscii() | ||
{ | ||
return escapes; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.