Skip to content

Commit

Permalink
Fix coercion gaps for Hive tables in ORC format
Browse files Browse the repository at this point in the history
There were a few difference on the coercion supported between partitioned
and un-partitioned table for ORC format.
  • Loading branch information
Praveen2112 committed Jun 11, 2024
1 parent 933e023 commit bd7b258
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,11 @@
import io.trino.plugin.hive.coercions.BooleanCoercer.OrcVarcharToBooleanCoercer;
import io.trino.plugin.hive.coercions.DateCoercer.DateToVarcharCoercer;
import io.trino.plugin.hive.coercions.DateCoercer.VarcharToDateCoercer;
import io.trino.plugin.hive.coercions.DoubleToFloatCoercer;
import io.trino.plugin.hive.coercions.FloatToDoubleCoercer;
import io.trino.plugin.hive.coercions.IntegerNumberToDoubleCoercer;
import io.trino.plugin.hive.coercions.IntegerNumberToVarcharCoercer;
import io.trino.plugin.hive.coercions.IntegerNumberUpscaleCoercer;
import io.trino.plugin.hive.coercions.TimestampCoercer.LongTimestampToDateCoercer;
import io.trino.plugin.hive.coercions.TimestampCoercer.LongTimestampToVarcharCoercer;
import io.trino.plugin.hive.coercions.TimestampCoercer.VarcharToLongTimestampCoercer;
Expand Down Expand Up @@ -56,8 +59,14 @@
import static io.trino.orc.metadata.OrcType.OrcTypeKind.STRING;
import static io.trino.orc.metadata.OrcType.OrcTypeKind.TIMESTAMP;
import static io.trino.orc.metadata.OrcType.OrcTypeKind.VARCHAR;
import static io.trino.plugin.hive.coercions.DecimalCoercers.createDecimalToDecimalCoercer;
import static io.trino.plugin.hive.coercions.DecimalCoercers.createDecimalToDoubleCoercer;
import static io.trino.plugin.hive.coercions.DecimalCoercers.createDecimalToInteger;
import static io.trino.plugin.hive.coercions.DecimalCoercers.createDecimalToRealCoercer;
import static io.trino.plugin.hive.coercions.DecimalCoercers.createDecimalToVarcharCoercer;
import static io.trino.plugin.hive.coercions.DecimalCoercers.createDoubleToDecimalCoercer;
import static io.trino.plugin.hive.coercions.DecimalCoercers.createIntegerNumberToDecimalCoercer;
import static io.trino.plugin.hive.coercions.DecimalCoercers.createRealToDecimalCoercer;
import static io.trino.plugin.hive.coercions.DoubleToVarcharCoercers.createDoubleToVarcharCoercer;
import static io.trino.plugin.hive.coercions.FloatToVarcharCoercers.createFloatToVarcharCoercer;
import static io.trino.spi.type.BigintType.BIGINT;
Expand All @@ -83,6 +92,9 @@ private OrcTypeTranslator() {}

if (fromOrcTypeKind == BYTE) {
return switch (toTrinoType) {
case SmallintType smallintType -> Optional.of(new IntegerNumberUpscaleCoercer<>(TINYINT, smallintType));
case IntegerType integerType -> Optional.of(new IntegerNumberUpscaleCoercer<>(TINYINT, integerType));
case BigintType bigintType -> Optional.of(new IntegerNumberUpscaleCoercer<>(TINYINT, bigintType));
case DoubleType ignored -> Optional.of(new IntegerNumberToDoubleCoercer<>(TINYINT));
case DecimalType decimalType -> Optional.of(createIntegerNumberToDecimalCoercer(TINYINT, decimalType));
case VarcharType varcharType -> Optional.of(new IntegerNumberToVarcharCoercer<>(TINYINT, varcharType));
Expand All @@ -92,6 +104,8 @@ private OrcTypeTranslator() {}

if (fromOrcTypeKind == SHORT) {
return switch (toTrinoType) {
case IntegerType integerType -> Optional.of(new IntegerNumberUpscaleCoercer<>(SMALLINT, integerType));
case BigintType bigintType -> Optional.of(new IntegerNumberUpscaleCoercer<>(SMALLINT, bigintType));
case DoubleType ignored -> Optional.of(new IntegerNumberToDoubleCoercer<>(SMALLINT));
case DecimalType decimalType -> Optional.of(createIntegerNumberToDecimalCoercer(SMALLINT, decimalType));
case VarcharType varcharType -> Optional.of(new IntegerNumberToVarcharCoercer<>(SMALLINT, varcharType));
Expand All @@ -101,6 +115,7 @@ private OrcTypeTranslator() {}

if (fromOrcTypeKind == INT) {
return switch (toTrinoType) {
case BigintType bigintType -> Optional.of(new IntegerNumberUpscaleCoercer<>(INTEGER, bigintType));
case DoubleType ignored -> Optional.of(new IntegerNumberToDoubleCoercer<>(INTEGER));
case DecimalType decimalType -> Optional.of(createIntegerNumberToDecimalCoercer(INTEGER, decimalType));
case VarcharType varcharType -> Optional.of(new IntegerNumberToVarcharCoercer<>(INTEGER, varcharType));
Expand All @@ -119,13 +134,17 @@ private OrcTypeTranslator() {}

if (fromOrcTypeKind == FLOAT) {
return switch (toTrinoType) {
case DoubleType ignored -> Optional.of(new FloatToDoubleCoercer());
case DecimalType decimalType -> Optional.of(createRealToDecimalCoercer(decimalType));
case VarcharType varcharType -> Optional.of(createFloatToVarcharCoercer(varcharType, true));
default -> Optional.empty();
};
}

if (fromOrcTypeKind == DOUBLE) {
return switch (toTrinoType) {
case RealType ignored -> Optional.of(new DoubleToFloatCoercer());
case DecimalType decimalType -> Optional.of(createDoubleToDecimalCoercer(decimalType));
case VarcharType varcharType -> Optional.of(createDoubleToVarcharCoercer(varcharType, true));
default -> Optional.empty();
};
Expand All @@ -134,6 +153,13 @@ private OrcTypeTranslator() {}
if (fromOrcTypeKind == DECIMAL) {
DecimalType sourceType = DecimalType.createDecimalType(fromOrcType.getPrecision().orElseThrow(), fromOrcType.getScale().orElseThrow());
return switch (toTrinoType) {
case TinyintType tinyintType -> Optional.of(createDecimalToInteger(sourceType, tinyintType));
case SmallintType smallintType -> Optional.of(createDecimalToInteger(sourceType, smallintType));
case IntegerType integerType -> Optional.of(createDecimalToInteger(sourceType, integerType));
case BigintType bigintType -> Optional.of(createDecimalToInteger(sourceType, bigintType));
case RealType ignored -> Optional.of(createDecimalToRealCoercer(sourceType));
case DoubleType ignored -> Optional.of(createDecimalToDoubleCoercer(sourceType));
case DecimalType decimalType -> Optional.of(createDecimalToDecimalCoercer(sourceType, decimalType));
case VarcharType varcharType -> Optional.of(createDecimalToVarcharCoercer(sourceType, varcharType));
default -> Optional.empty();
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -239,27 +239,6 @@ protected Map<ColumnContext, String> expectedExceptionsWithTrinoContext()
.put(columnContext("orc", "row_to_row"), "Cannot read SQL type 'smallint' from ORC stream '.row_to_row.ti2si' of type BYTE")
.put(columnContext("orc", "list_to_list"), "Cannot read SQL type 'integer' from ORC stream '.list_to_list.item.ti2int' of type BYTE")
.put(columnContext("orc", "map_to_map"), "Cannot read SQL type 'integer' from ORC stream '.map_to_map.key' of type BYTE")
.put(columnContext("orc", "tinyint_to_smallint"), "Cannot read SQL type 'smallint' from ORC stream '.tinyint_to_smallint' of type BYTE")
.put(columnContext("orc", "tinyint_to_int"), "Cannot read SQL type 'integer' from ORC stream '.tinyint_to_int' of type BYTE")
.put(columnContext("orc", "tinyint_to_bigint"), "Cannot read SQL type 'bigint' from ORC stream '.tinyint_to_bigint' of type BYTE")
.put(columnContext("orc", "double_to_float"), "Cannot read SQL type 'real' from ORC stream '.double_to_float' of type DOUBLE")
.put(columnContext("orc", "longdecimal_to_shortdecimal"), "Decimal does not fit long (invalid table schema?)")
.put(columnContext("orc", "float_to_decimal"), "Cannot read SQL type 'decimal(10,5)' from ORC stream '.float_to_decimal' of type FLOAT")
.put(columnContext("orc", "double_to_decimal"), "Cannot read SQL type 'decimal(10,5)' from ORC stream '.double_to_decimal' of type DOUBLE")
.put(columnContext("orc", "decimal_to_float"), "Cannot read SQL type 'real' from ORC stream '.decimal_to_float' of type DECIMAL")
.put(columnContext("orc", "decimal_to_double"), "Cannot read SQL type 'double' from ORC stream '.decimal_to_double' of type DECIMAL")
.put(columnContext("orc", "longdecimal_to_tinyint"), "Cannot read SQL type 'tinyint' from ORC stream '.longdecimal_to_tinyint' of type DECIMAL")
.put(columnContext("orc", "shortdecimal_to_tinyint"), "Cannot read SQL type 'tinyint' from ORC stream '.shortdecimal_to_tinyint' of type DECIMAL")
.put(columnContext("orc", "longdecimal_to_smallint"), "Cannot read SQL type 'smallint' from ORC stream '.longdecimal_to_smallint' of type DECIMAL")
.put(columnContext("orc", "shortdecimal_to_smallint"), "Cannot read SQL type 'smallint' from ORC stream '.shortdecimal_to_smallint' of type DECIMAL")
.put(columnContext("orc", "too_big_shortdecimal_to_smallint"), "Cannot read SQL type 'smallint' from ORC stream '.too_big_shortdecimal_to_smallint' of type DECIMAL")
.put(columnContext("orc", "longdecimal_to_int"), "Cannot read SQL type 'integer' from ORC stream '.longdecimal_to_int' of type DECIMAL")
.put(columnContext("orc", "shortdecimal_to_int"), "Cannot read SQL type 'integer' from ORC stream '.shortdecimal_to_int' of type DECIMAL")
.put(columnContext("orc", "shortdecimal_with_0_scale_to_int"), "Cannot read SQL type 'integer' from ORC stream '.shortdecimal_with_0_scale_to_int' of type DECIMAL")
.put(columnContext("orc", "longdecimal_to_bigint"), "Cannot read SQL type 'bigint' from ORC stream '.longdecimal_to_bigint' of type DECIMAL")
.put(columnContext("orc", "shortdecimal_to_bigint"), "Cannot read SQL type 'bigint' from ORC stream '.shortdecimal_to_bigint' of type DECIMAL")
.put(columnContext("orc", "short_decimal_to_bounded_varchar"), "Cannot read SQL type 'varchar(30)' from ORC stream '.short_decimal_to_bounded_varchar' of type DECIMAL")
.put(columnContext("orc", "long_decimal_to_bounded_varchar"), "Cannot read SQL type 'varchar(30)' from ORC stream '.long_decimal_to_bounded_varchar' of type DECIMAL")
.put(columnContext("orc", "timestamp_row_to_row"), "Cannot read SQL type 'varchar' from ORC stream '.timestamp_row_to_row.timestamp2string' of type TIMESTAMP with attributes {}")
.put(columnContext("orc", "timestamp_list_to_list"), "Cannot read SQL type 'varchar' from ORC stream '.timestamp_row_to_row.timestamp2string' of type TIMESTAMP with attributes {}")
.put(columnContext("orc", "timestamp_map_to_map"), "Cannot read SQL type 'varchar' from ORC stream '.timestamp_row_to_row.timestamp2string' of type TIMESTAMP with attributes {}")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1113,9 +1113,9 @@ public void testOrcColumnTypeChange(boolean transactional)
log.info("This shows that Hive see the old data after a column is widened");
assertThat(onHive().executeQuery("SELECT * FROM " + tableName))
.containsOnly(row(111, "Katy", 57, "CA"), row(222, "Joe", 72, "WA"));
log.info("This shows that Trino gets an exception trying to widen the type");
assertQueryFailure(() -> onTrino().executeQuery("SELECT * FROM " + tableName))
.hasMessageMatching(".*Malformed ORC file. Cannot read SQL type 'integer' from ORC stream '.*.age' of type BYTE with attributes.*");
log.info("This shows that Trino see the old data after a column is widened");
assertThat(onTrino().executeQuery("SELECT * FROM " + tableName))
.containsOnly(row(111, "Katy", 57, "CA"), row(222, "Joe", 72, "WA"));
});
}

Expand Down

0 comments on commit bd7b258

Please sign in to comment.