Skip to content

Commit

Permalink
Simplify Iceberg bucket transform definition
Browse files Browse the repository at this point in the history
The `Hasher` is the only variable part. Extract method for constructing
the right `Hasher` and have common code path for the rest.
  • Loading branch information
findepi committed Nov 4, 2021
1 parent 9e7e15b commit f709aae
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 73 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -166,49 +166,49 @@ private static ColumnTransform identity(Type type)
return new ColumnTransform(type, Function.identity());
}

private static ColumnTransform bucket(Type type, int count)
@VisibleForTesting
static ColumnTransform bucket(Type type, int count)
{
Hasher hasher = getBucketingHash(type);
return new ColumnTransform(
INTEGER,
getBucketTransform(type, count));
block -> bucketBlock(block, count, hasher));
}

public static Function<Block, Block> getBucketTransform(Type type, int count)
private static Hasher getBucketingHash(Type type)
{
if (type.equals(INTEGER)) {
return block -> bucketInteger(block, count);
return PartitionTransforms::hashInteger;
}
if (type.equals(BIGINT)) {
return block -> bucketBigint(block, count);
return PartitionTransforms::hashBigint;
}
if (isShortDecimal(type)) {
DecimalType decimal = (DecimalType) type;
return block -> bucketShortDecimal(decimal, block, count);
return hashShortDecimal((DecimalType) type);
}
if (isLongDecimal(type)) {
DecimalType decimal = (DecimalType) type;
return block -> bucketLongDecimal(decimal, block, count);
return hashLongDecimal((DecimalType) type);
}
if (type.equals(DATE)) {
return block -> bucketDate(block, count);
return PartitionTransforms::hashDate;
}
if (type.equals(TIME_MICROS)) {
return block -> bucketTime(block, count);
return PartitionTransforms::hashTime;
}
if (type.equals(TIMESTAMP_MICROS)) {
return block -> bucketTimestamp(block, count);
return PartitionTransforms::hashTimestamp;
}
if (type.equals(TIMESTAMP_TZ_MICROS)) {
return block -> bucketTimestampWithTimeZone(block, count);
return PartitionTransforms::hashTimestampWithTimeZone;
}
if (type instanceof VarcharType) {
return block -> bucketVarchar(block, count);
return PartitionTransforms::hashVarchar;
}
if (type.equals(VARBINARY)) {
return block -> bucketVarbinary(block, count);
return PartitionTransforms::hashVarbinary;
}
if (type.equals(UUID)) {
return block -> bucketUuid(block, count);
return PartitionTransforms::hashUuid;
}
throw new UnsupportedOperationException("Unsupported type for 'bucket': " + type);
}
Expand Down Expand Up @@ -304,31 +304,16 @@ private static Block extractTimestampWithTimeZone(Block block, ToLongFunction<Lo
return builder.build();
}

private static Block bucketInteger(Block block, int count)
{
return bucketBlock(block, count, PartitionTransforms::hashInteger);
}

private static int hashInteger(Block block, int position)
{
return bucketHash(INTEGER.getLong(block, position));
}

private static Block bucketBigint(Block block, int count)
{
return bucketBlock(block, count, PartitionTransforms::hashBigint);
}

private static int hashBigint(Block block, int position)
{
return bucketHash(BIGINT.getLong(block, position));
}

private static Block bucketShortDecimal(DecimalType decimal, Block block, int count)
{
return bucketBlock(block, count, hashShortDecimal(decimal));
}

private static Hasher hashShortDecimal(DecimalType decimal)
{
return (block, position) -> {
Expand All @@ -338,11 +323,6 @@ private static Hasher hashShortDecimal(DecimalType decimal)
};
}

private static Block bucketLongDecimal(DecimalType decimal, Block block, int count)
{
return bucketBlock(block, count, hashLongDecimal(decimal));
}

private static Hasher hashLongDecimal(DecimalType decimal)
{
return (block, position) -> {
Expand All @@ -352,72 +332,37 @@ private static Hasher hashLongDecimal(DecimalType decimal)
};
}

private static Block bucketDate(Block block, int count)
{
return bucketBlock(block, count, PartitionTransforms::hashDate);
}

private static int hashDate(Block block, int position)
{
return bucketHash(DATE.getLong(block, position));
}

private static Block bucketTime(Block block, int count)
{
return bucketBlock(block, count, PartitionTransforms::hashTime);
}

private static int hashTime(Block block, int position)
{
long picos = TIME_MICROS.getLong(block, position);
return bucketHash(picos / PICOSECONDS_PER_MICROSECOND);
}

private static Block bucketTimestamp(Block block, int count)
{
return bucketBlock(block, count, PartitionTransforms::hashTimestamp);
}

private static int hashTimestamp(Block block, int position)
{
return bucketHash(TIMESTAMP_MICROS.getLong(block, position));
}

private static Block bucketTimestampWithTimeZone(Block block, int count)
{
return bucketBlock(block, count, PartitionTransforms::hashTimestampWithTimeZone);
}

private static int hashTimestampWithTimeZone(Block block, int position)
{
return bucketHash(timestampTzToMicros(getTimestampTz(block, position)));
}

private static Block bucketVarchar(Block block, int count)
{
return bucketBlock(block, count, PartitionTransforms::hashVarchar);
}

private static int hashVarchar(Block block, int position)
{
return bucketHash(VARCHAR.getSlice(block, position));
}

private static Block bucketVarbinary(Block block, int count)
{
return bucketBlock(block, count, PartitionTransforms::hashVarbinary);
}

private static int hashVarbinary(Block block, int position)
{
return bucketHash(VARBINARY.getSlice(block, position));
}

private static Block bucketUuid(Block block, int count)
{
return bucketBlock(block, count, PartitionTransforms::hashUuid);
}

private static int hashUuid(Block block, int position)
{
return bucketHash(UUID.getSlice(block, position));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
import static com.google.common.base.Verify.verify;
import static io.airlift.slice.Slices.utf8Slice;
import static io.airlift.slice.Slices.wrappedBuffer;
import static io.trino.plugin.iceberg.PartitionTransforms.getBucketTransform;
import static io.trino.plugin.iceberg.TypeConverter.toTrinoType;
import static io.trino.spi.type.BigintType.BIGINT;
import static io.trino.spi.type.DateType.DATE;
Expand Down Expand Up @@ -277,7 +276,7 @@ private Integer computeIcebergBucket(Type type, Object icebergValue, int bucketC
private Integer computeTrinoBucket(Type icebergType, Object icebergValue, int bucketCount)
{
io.trino.spi.type.Type trinoType = toTrinoType(icebergType, TYPE_MANAGER);
Function<Block, Block> bucketTransform = getBucketTransform(trinoType, bucketCount);
Function<Block, Block> bucketTransform = PartitionTransforms.bucket(trinoType, bucketCount).getTransform();

BlockBuilder blockBuilder = trinoType.createBlockBuilder(null, 1);

Expand Down

0 comments on commit f709aae

Please sign in to comment.