Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Treat precision as NANOSECONDS for timestamp to be coerced #18003

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -71,6 +71,7 @@
import static io.trino.plugin.hive.HivePageSourceProvider.ColumnMapping.toColumnHandles;
import static io.trino.plugin.hive.HivePageSourceProvider.ColumnMappingKind.PREFILLED;
import static io.trino.plugin.hive.HiveSessionProperties.getTimestampPrecision;
import static io.trino.plugin.hive.coercions.CoercionUtils.createTypeFromCoercer;
import static io.trino.plugin.hive.util.HiveBucketing.HiveBucketFilter;
import static io.trino.plugin.hive.util.HiveBucketing.getHiveBucketFilter;
import static io.trino.plugin.hive.util.HiveUtil.getPrefilledColumnValue;
@@ -564,14 +565,14 @@ public static List<HiveColumnHandle> toColumnHandles(List<ColumnMapping> regular
projectedColumn.getDereferenceIndices(),
projectedColumn.getDereferenceNames(),
fromHiveType,
fromHiveType.getType(typeManager, timestampPrecision));
createTypeFromCoercer(typeManager, fromHiveType, columnHandle.getHiveType(), timestampPrecision));
});

return new HiveColumnHandle(
columnHandle.getBaseColumnName(),
columnHandle.getBaseHiveColumnIndex(),
fromHiveTypeBase,
fromHiveTypeBase.getType(typeManager, timestampPrecision),
createTypeFromCoercer(typeManager, fromHiveTypeBase, columnHandle.getBaseHiveType(), timestampPrecision),
newColumnProjectionInfo,
columnHandle.getColumnType(),
columnHandle.getComment());
Original file line number Diff line number Diff line change
@@ -19,7 +19,7 @@
import io.trino.spi.type.CharType;

import static com.google.common.base.Preconditions.checkArgument;
import static io.trino.plugin.hive.HivePageSource.narrowerThan;
import static io.trino.plugin.hive.coercions.CoercionUtils.narrowerThan;
import static io.trino.spi.type.Chars.truncateToLengthAndTrimSpaces;

public class CharCoercer

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -24,8 +24,6 @@
import io.trino.spi.type.RealType;
import io.trino.spi.type.VarcharType;

import java.util.function.Function;

import static io.trino.spi.StandardErrorCode.INVALID_ARGUMENTS;
import static io.trino.spi.type.DecimalConversions.doubleToLongDecimal;
import static io.trino.spi.type.DecimalConversions.doubleToShortDecimal;
@@ -49,7 +47,7 @@ public final class DecimalCoercers
{
private DecimalCoercers() {}

public static Function<Block, Block> createDecimalToDecimalCoercer(DecimalType fromType, DecimalType toType)
public static TypeCoercer<DecimalType, DecimalType> createDecimalToDecimalCoercer(DecimalType fromType, DecimalType toType)
{
if (fromType.isShort()) {
if (toType.isShort()) {
@@ -148,7 +146,7 @@ protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int pos
}
}

public static Function<Block, Block> createDecimalToDoubleCoercer(DecimalType fromType)
public static TypeCoercer<DecimalType, DoubleType> createDecimalToDoubleCoercer(DecimalType fromType)
{
if (fromType.isShort()) {
return new ShortDecimalToDoubleCoercer(fromType);
@@ -191,7 +189,7 @@ protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int pos
}
}

public static Function<Block, Block> createDecimalToRealCoercer(DecimalType fromType)
public static TypeCoercer<DecimalType, RealType> createDecimalToRealCoercer(DecimalType fromType)
{
if (fromType.isShort()) {
return new ShortDecimalToRealCoercer(fromType);
@@ -234,7 +232,7 @@ protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int pos
}
}

public static Function<Block, Block> createDecimalToVarcharCoercer(DecimalType fromType, VarcharType toType)
public static TypeCoercer<DecimalType, VarcharType> createDecimalToVarcharCoercer(DecimalType fromType, VarcharType toType)
{
if (fromType.isShort()) {
return new ShortDecimalToVarcharCoercer(fromType, toType);
@@ -288,7 +286,7 @@ protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int pos
}
}

public static Function<Block, Block> createDoubleToDecimalCoercer(DecimalType toType)
public static TypeCoercer<DoubleType, DecimalType> createDoubleToDecimalCoercer(DecimalType toType)
{
if (toType.isShort()) {
return new DoubleToShortDecimalCoercer(toType);
@@ -328,7 +326,7 @@ protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int pos
}
}

public static Function<Block, Block> createRealToDecimalCoercer(DecimalType toType)
public static TypeCoercer<RealType, DecimalType> createRealToDecimalCoercer(DecimalType toType)
{
if (toType.isShort()) {
return new RealToShortDecimalCoercer(toType);
Original file line number Diff line number Diff line change
@@ -48,29 +48,6 @@ public final class TimestampCoercer

private TimestampCoercer() {}

public static class ShortTimestampToVarcharCoercer
Copy link
Contributor

@vlad-lyutenko vlad-lyutenko Jun 23, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am a little bit missed, why it's safe to remove it here?
It's because of this change in HivePageSource:

if (fromType instanceof TimestampType timestampType && toType instanceof VarcharType varcharType) {
            // --deleted-- if (timestampType.isShort()) {
            //  --deleted--  return Optional.of(new ShortTimestampToVarcharCoercer(timestampType, varcharType));
            // }
            return Optional.of(new LongTimestampToVarcharCoercer(timestampType, varcharType));
        }

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously we were respecting the timestamp precision - so we need a coercer which would could have timestamp read as long or it could be as LongTimestamp - but this change would ensure that we would be reading the timestamp column as a NANOSECOND - so we don't need ShortTimestampToVarcharCoercer

extends TypeCoercer<TimestampType, VarcharType>
{
public ShortTimestampToVarcharCoercer(TimestampType fromType, VarcharType toType)
{
super(fromType, toType);
}

@Override
protected void applyCoercedValue(BlockBuilder blockBuilder, Block block, int position)
{
long epochMicros = fromType.getLong(block, position);
long epochSecond = floorDiv(epochMicros, MICROSECONDS_PER_SECOND);
int nanoFraction = floorMod(epochMicros, MICROSECONDS_PER_SECOND) * NANOSECONDS_PER_MICROSECOND;
toType.writeSlice(
blockBuilder,
truncateToLength(
Slices.utf8Slice(
LOCAL_DATE_TIME.format(LocalDateTime.ofEpochSecond(epochSecond, nanoFraction, UTC))),
toType));
}
}

public static class LongTimestampToVarcharCoercer
extends TypeCoercer<TimestampType, VarcharType>
{
Original file line number Diff line number Diff line change
@@ -19,7 +19,7 @@
import io.trino.spi.type.VarcharType;

import static com.google.common.base.Preconditions.checkArgument;
import static io.trino.plugin.hive.HivePageSource.narrowerThan;
import static io.trino.plugin.hive.coercions.CoercionUtils.narrowerThan;
import static io.trino.spi.type.Varchars.truncateToLength;

public class VarcharCoercer
Original file line number Diff line number Diff line change
@@ -90,7 +90,6 @@
import static io.trino.plugin.hive.HiveSessionProperties.getOrcMaxReadBlockSize;
import static io.trino.plugin.hive.HiveSessionProperties.getOrcStreamBufferSize;
import static io.trino.plugin.hive.HiveSessionProperties.getOrcTinyStripeThreshold;
import static io.trino.plugin.hive.HiveSessionProperties.getTimestampPrecision;
import static io.trino.plugin.hive.HiveSessionProperties.isOrcBloomFiltersEnabled;
import static io.trino.plugin.hive.HiveSessionProperties.isOrcNestedLazy;
import static io.trino.plugin.hive.HiveSessionProperties.isUseOrcColumnNames;
@@ -366,7 +365,7 @@ else if (column.getBaseHiveColumnIndex() < fileColumns.size()) {
Type readType = column.getType();
if (orcColumn != null) {
int sourceIndex = fileReadColumns.size();
Optional<TypeCoercer<?, ?>> coercer = createCoercer(orcColumn.getColumnType(), readType, getTimestampPrecision(session));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a question: We are no longer relay on a session?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously we rely on the session to determine the timestamp precision so that we could read the timestamp column with that precision and now we would read them as NANOSECOND irrespective of the precision configured.

Optional<TypeCoercer<?, ?>> coercer = createCoercer(orcColumn.getColumnType(), readType);
if (coercer.isPresent()) {
fileReadTypes.add(coercer.get().getFromType());
columnAdaptations.add(ColumnAdaptation.coercedColumn(sourceIndex, coercer.get()));
Original file line number Diff line number Diff line change
@@ -14,31 +14,24 @@
package io.trino.plugin.hive.orc;

import io.trino.orc.metadata.OrcType.OrcTypeKind;
import io.trino.plugin.hive.HiveTimestampPrecision;
import io.trino.plugin.hive.coercions.TimestampCoercer.LongTimestampToVarcharCoercer;
import io.trino.plugin.hive.coercions.TimestampCoercer.ShortTimestampToVarcharCoercer;
import io.trino.plugin.hive.coercions.TypeCoercer;
import io.trino.spi.type.TimestampType;
import io.trino.spi.type.Type;
import io.trino.spi.type.VarcharType;

import java.util.Optional;

import static io.trino.orc.metadata.OrcType.OrcTypeKind.TIMESTAMP;
import static io.trino.spi.type.TimestampType.createTimestampType;
import static io.trino.spi.type.TimestampType.TIMESTAMP_NANOS;

public final class OrcTypeTranslator
{
private OrcTypeTranslator() {}

public static Optional<TypeCoercer<? extends Type, ? extends Type>> createCoercer(OrcTypeKind fromOrcType, Type toTrinoType, HiveTimestampPrecision timestampPrecision)
public static Optional<TypeCoercer<? extends Type, ? extends Type>> createCoercer(OrcTypeKind fromOrcType, Type toTrinoType)
{
if (fromOrcType == TIMESTAMP && toTrinoType instanceof VarcharType varcharType) {
TimestampType timestampType = createTimestampType(timestampPrecision.getPrecision());
if (timestampType.isShort()) {
return Optional.of(new ShortTimestampToVarcharCoercer(timestampType, varcharType));
}
return Optional.of(new LongTimestampToVarcharCoercer(timestampType, varcharType));
return Optional.of(new LongTimestampToVarcharCoercer(TIMESTAMP_NANOS, varcharType));
}
return Optional.empty();
}
Loading