Skip to content

Commit

Permalink
Throw TrinoException for not supported CSV
Browse files Browse the repository at this point in the history
  • Loading branch information
pajaks authored and wendigo committed May 23, 2024
1 parent 97b8a27 commit df454db
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import io.trino.hive.formats.line.Column;
import io.trino.hive.formats.line.LineDeserializer;
import io.trino.hive.formats.line.LineDeserializerFactory;
import io.trino.spi.TrinoException;

import java.util.List;
import java.util.Map;
Expand All @@ -31,6 +32,7 @@
import static io.trino.hive.formats.line.csv.CsvConstants.SEPARATOR_KEY;
import static io.trino.hive.formats.line.csv.CsvConstants.SERIALIZER_DEFAULT_ESCAPE;
import static io.trino.hive.formats.line.csv.CsvConstants.getCharProperty;
import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED;

public class CsvDeserializerFactory
implements LineDeserializerFactory
Expand All @@ -44,18 +46,23 @@ public Set<String> getHiveSerDeClassNames()
@Override
public LineDeserializer create(List<Column> columns, Map<String, String> serdeProperties)
{
char separatorChar = getCharProperty(serdeProperties, SEPARATOR_KEY, DEFAULT_SEPARATOR);
char quoteChar = getCharProperty(serdeProperties, QUOTE_KEY, DEFAULT_QUOTE);
char escapeChar = getCharProperty(serdeProperties, ESCAPE_KEY, DESERIALIZER_DEFAULT_ESCAPE);
// Hive has a bug where when the escape character is explicitly set to double quote (char 34),
// it changes the escape character to backslash (char 92) when deserializing.
if (escapeChar == SERIALIZER_DEFAULT_ESCAPE) {
// Add an explicit checks for separator or quote being backslash, so a more helpful error message can be provided
// as this Hive behavior is not obvious
checkArgument(separatorChar != DESERIALIZER_DEFAULT_ESCAPE, "Separator character cannot be '\\' when escape character is '\"'");
checkArgument(quoteChar != DESERIALIZER_DEFAULT_ESCAPE, "Quote character cannot be '\\' when escape character is '\"'");
escapeChar = DESERIALIZER_DEFAULT_ESCAPE;
try {
char separatorChar = getCharProperty(serdeProperties, SEPARATOR_KEY, DEFAULT_SEPARATOR);
char quoteChar = getCharProperty(serdeProperties, QUOTE_KEY, DEFAULT_QUOTE);
char escapeChar = getCharProperty(serdeProperties, ESCAPE_KEY, DESERIALIZER_DEFAULT_ESCAPE);
// Hive has a bug where when the escape character is explicitly set to double quote (char 34),
// it changes the escape character to backslash (char 92) when deserializing.
if (escapeChar == SERIALIZER_DEFAULT_ESCAPE) {
// Add an explicit checks for separator or quote being backslash, so a more helpful error message can be provided
// as this Hive behavior is not obvious
checkArgument(separatorChar != DESERIALIZER_DEFAULT_ESCAPE, "Separator character cannot be '\\' when escape character is '\"'");
checkArgument(quoteChar != DESERIALIZER_DEFAULT_ESCAPE, "Quote character cannot be '\\' when escape character is '\"'");
escapeChar = DESERIALIZER_DEFAULT_ESCAPE;
}
return new CsvDeserializer(columns, separatorChar, quoteChar, escapeChar);
}
catch (IllegalArgumentException e) {
throw new TrinoException(NOT_SUPPORTED, "CSV not supported", e);
}
return new CsvDeserializer(columns, separatorChar, quoteChar, escapeChar);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import io.trino.hive.formats.line.LineSerializer;
import io.trino.spi.Page;
import io.trino.spi.PageBuilder;
import io.trino.spi.TrinoException;
import io.trino.spi.type.RowType;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde2.Deserializer;
Expand Down Expand Up @@ -269,6 +270,9 @@ private static void assertInvalidConfig(Optional<Character> separatorChar, Optio
.isInstanceOf(SerDeException.class)
.hasMessage("java.lang.UnsupportedOperationException: The separator, quote, and escape characters must be different!");
assertThatThrownBy(() -> new CsvDeserializerFactory().create(createReadColumns(3), createCsvProperties(separatorChar, quoteChar, escapeChar)))
.isInstanceOf(TrinoException.class)
.hasMessageMatching("CSV not supported")
.cause()
.isInstanceOf(IllegalArgumentException.class)
.hasMessageMatching("(Quote|Separator) character cannot be '\\\\' when escape character is '\"'");
}
Expand Down

0 comments on commit df454db

Please sign in to comment.