Skip to content

Commit

Permalink
Allow escaping of non-printable characters in CSV output/input
Browse files Browse the repository at this point in the history
This is a proposed solution for FasterXML#124. It introduces a new Feature, `ESCAPE_CONTROL_CHARS_WITH_ESCAPE_CHAR`,
which will apply the standard ASCII escapes from JSON to all characters that the CSV generator writes.

If this solution is workable, I will add tests.
  • Loading branch information
Henning Schmiedehausen authored and pjankovsky committed Jun 25, 2019
1 parent 62ff5b2 commit a9ce6fa
Show file tree
Hide file tree
Showing 4 changed files with 304 additions and 12 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
package com.fasterxml.jackson.dataformat.csv;

import com.fasterxml.jackson.core.SerializableString;
import com.fasterxml.jackson.core.io.CharTypes;
import com.fasterxml.jackson.core.io.CharacterEscapes;
import com.fasterxml.jackson.dataformat.csv.CsvGenerator.Feature;

/**
* Character escapes for CSV. There are multiple types of escapes.
*
* <ul>
* <li>no escapes - return all characters the same way they are defined</li>
* <li>quote escape - return all characters except the quote character which is escaped (backwards compat) </li>
* <li>control escape - same as {@link CharTypes#get7BitOutputEscapes()}, escape all control characters</li>
* <li> control and quote escape - do not double up quote, escape control characters and quote.</li>
* </ul>
*/
public final class CsvCharacterEscapes extends CharacterEscapes
{

private static final long serialVersionUID = 1L;

// No character escapes, every character returned as is.
private static final CsvCharacterEscapes sNoEscapesInstance = new CsvCharacterEscapes(new int[0]);

// Only escape quotes, controlled by {@link Feature#ESCAPE_QUOTE_CHAR_WITH_ESCAPE_CHAR}.
private static final CsvCharacterEscapes sQuoteEscapesInstance;

// Only escape control chars, do *not* escape the quote char. See (@link Feature#ESCAPE_CONTROL_CHARS_WITH_ESCAPE_CHAR}.
private static final CsvCharacterEscapes sControlEscapesInstance;

// Escape control chars and the quote char.
private static final CsvCharacterEscapes sControlQuoteEscapesInstance = new CsvCharacterEscapes(CharacterEscapes.standardAsciiEscapesForJSON());

private static final CsvCharacterEscapes [] sEscapes;

static {
int[] quoteEscapes = new int[(int) '"' + 1];
quoteEscapes[(int) '"'] = '"';
sQuoteEscapesInstance = new CsvCharacterEscapes(quoteEscapes);

int[] controlEscapes = CharacterEscapes.standardAsciiEscapesForJSON();
controlEscapes['"'] = 0; // do not escape ", double it up.
sControlEscapesInstance = new CsvCharacterEscapes(controlEscapes);

sEscapes = new CsvCharacterEscapes[4];
sEscapes[0] = sNoEscapesInstance;
sEscapes[1] = sQuoteEscapesInstance;
sEscapes[2] = sControlEscapesInstance;
sEscapes[3] = sControlQuoteEscapesInstance;
}


private final int[] escapes;

private CsvCharacterEscapes(int[] escapes)
{
this.escapes = escapes;
}

public static CsvCharacterEscapes noEscapesInstance()
{
return sNoEscapesInstance;
}

public static CsvCharacterEscapes quoteEscapesInstance()
{
return sQuoteEscapesInstance;
}

public static CsvCharacterEscapes controlEscapesInstance()
{
return sControlEscapesInstance;
}

public static CsvCharacterEscapes controlQuoteEscapesInstance()
{
return sControlQuoteEscapesInstance;
}

public static CsvCharacterEscapes fromCsvFeatures(int csvFeatures)
{
int idx = 0;
idx |= CsvGenerator.Feature.ESCAPE_QUOTE_CHAR_WITH_ESCAPE_CHAR.enabledIn(csvFeatures) ? 1 : 0;
idx |= Feature.ESCAPE_CONTROL_CHARS_WITH_ESCAPE_CHAR.enabledIn(csvFeatures) ? 2 : 0;

return sEscapes[idx];
}

@Override
public SerializableString getEscapeSequence(int ch)
{
return null; // unused for CSV escapes
}

@Override
public int[] getEscapeCodesForAscii()
{
return escapes;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,8 @@ protected CsvGenerator _createGenerator(IOContext ctxt, Writer out) throws IOExc
CsvGenerator gen = new CsvGenerator(ctxt, _generatorFeatures, _csvGeneratorFeatures,
_objectCodec, out, _schema);
// any other initializations? No?

gen.setCharacterEscapes(CsvCharacterEscapes.fromCsvFeatures(_csvGeneratorFeatures));
return gen;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import com.fasterxml.jackson.core.*;
import com.fasterxml.jackson.core.base.GeneratorBase;
import com.fasterxml.jackson.core.io.CharacterEscapes;
import com.fasterxml.jackson.core.json.JsonWriteContext;
import com.fasterxml.jackson.core.io.IOContext;
import com.fasterxml.jackson.dataformat.csv.impl.CsvEncoder;
Expand Down Expand Up @@ -81,7 +82,19 @@ public enum Feature
*
* @since 2.9.3
*/
ESCAPE_QUOTE_CHAR_WITH_ESCAPE_CHAR(false)
ESCAPE_QUOTE_CHAR_WITH_ESCAPE_CHAR(false),

/**
* Feature that determines whether control characters (non-printable) are escaped using the
* configured escape character. This feature allows LF and CR characters to be output as <pre>\n</pre>
* and <pre>\r</pre> instead of being echoed out. This is a compatibility feature for some
* parsers that can not read such output back in.
* <p>
* Default value is false so that control characters are echoed out (backwards compatible).
*
* @since 2.9.9
*/
ESCAPE_CONTROL_CHARS_WITH_ESCAPE_CHAR(false)
;

protected final boolean _defaultState;
Expand Down Expand Up @@ -146,6 +159,8 @@ private Feature(boolean defaultState) {
// note: can not be final since we may need to re-create it for new schema
protected CsvEncoder _writer;

protected CharacterEscapes _characterEscapes = null;

/*
/**********************************************************
/* Output state
Expand Down Expand Up @@ -220,6 +235,8 @@ public CsvGenerator(IOContext ctxt, int jsonFeatures, int csvFeatures,
_formatFeatures = csvFeatures;
_schema = schema;
_writer = new CsvEncoder(ctxt, csvFeatures, out, schema);

_writer.setOutputEscapes(CsvCharacterEscapes.fromCsvFeatures(csvFeatures).getEscapeCodesForAscii());
}

public CsvGenerator(IOContext ctxt, int jsonFeatures, int csvFeatures,
Expand Down Expand Up @@ -312,6 +329,22 @@ public JsonGenerator overrideFormatFeatures(int values, int mask)
return this;
}

public JsonGenerator setCharacterEscapes(CharacterEscapes esc) {
this._characterEscapes = esc;
if (esc != null) {
this._writer.setOutputEscapes(esc.getEscapeCodesForAscii());
} else {
this._writer.setOutputEscapes(CsvCharacterEscapes.fromCsvFeatures(_formatFeatures).getEscapeCodesForAscii());
}

return this;
}

public CharacterEscapes getCharacterEscapes() {
return this._characterEscapes;
}


/*
/**********************************************************
/* Public API, capability introspection methods
Expand Down
Loading

0 comments on commit a9ce6fa

Please sign in to comment.