Skip to content

Commit

Permalink
Fix deprecated OriginalType in trino-hive trinodb#1802
Browse files Browse the repository at this point in the history
- Replace OriginalType with LogicalTypeAnnotation whenever possible
- Refactor test list & map wrappers for consistency
  • Loading branch information
nevillelyh committed Aug 8, 2022
1 parent 9601c74 commit 599bc28
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 64 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
import org.apache.parquet.schema.Type;
import org.apache.parquet.schema.Type.Repetition;
Expand All @@ -34,7 +34,6 @@
import java.util.Locale;

import static com.google.common.base.Preconditions.checkState;
import static org.apache.parquet.schema.OriginalType.MAP_KEY_VALUE;

/**
* This class is copied from org.apache.hadoop.hive.ql.io.parquet.convert.HiveSchemaConverter
Expand Down Expand Up @@ -69,7 +68,7 @@ private static Type convertType(String name, TypeInfo typeInfo, Repetition repet
{
if (typeInfo.getCategory() == Category.PRIMITIVE) {
if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) {
return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8)
return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(LogicalTypeAnnotation.stringType())
.named(name);
}
else if (typeInfo.equals(TypeInfoFactory.intTypeInfo) ||
Expand Down Expand Up @@ -100,21 +99,21 @@ else if (typeInfo.equals(TypeInfoFactory.voidTypeInfo)) {
}
else if (typeInfo.getTypeName().toLowerCase(Locale.ENGLISH).startsWith(
serdeConstants.CHAR_TYPE_NAME)) {
return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
return Types.optional(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named(name);
}
else if (typeInfo.getTypeName().toLowerCase(Locale.ENGLISH).startsWith(
serdeConstants.VARCHAR_TYPE_NAME)) {
return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
return Types.optional(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named(name);
}
else if (typeInfo instanceof DecimalTypeInfo) {
DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
int prec = decimalTypeInfo.precision();
int scale = decimalTypeInfo.scale();
int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1];
return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name);
return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(LogicalTypeAnnotation.decimalType(scale, prec)).named(name);
}
else if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) {
return Types.primitive(PrimitiveTypeName.INT32, repetition).as(OriginalType.DATE).named(name);
return Types.primitive(PrimitiveTypeName.INT32, repetition).as(LogicalTypeAnnotation.dateType()).named(name);
}
else if (typeInfo.equals(TypeInfoFactory.unknownTypeInfo)) {
throw new UnsupportedOperationException("Unknown type not implemented");
Expand Down Expand Up @@ -145,7 +144,7 @@ else if (typeInfo.getCategory() == Category.UNION) {
private static GroupType convertArrayType(String name, ListTypeInfo typeInfo)
{
TypeInfo subType = typeInfo.getListElementTypeInfo();
return listWrapper(name, OriginalType.LIST, new GroupType(Repetition.REPEATED,
return listWrapper(name, new GroupType(Repetition.REPEATED,
ParquetHiveSerDe.ARRAY.toString(), convertType("array_element", subType)));
}

Expand All @@ -172,41 +171,42 @@ public static GroupType mapType(Repetition repetition, String alias, String mapA
{
//support projection only on key of a map
if (valueType == null) {
return listWrapper(
repetition,
return mapKvWrapper(
alias,
MAP_KEY_VALUE,
new GroupType(
Repetition.REPEATED,
mapAlias,
keyType));
keyType),
repetition);
}
else {
if (!valueType.getName().equals("value")) {
throw new RuntimeException(valueType.getName() + " should be value");
}
return listWrapper(
repetition,
return mapKvWrapper(
alias,
MAP_KEY_VALUE,
new GroupType(
Repetition.REPEATED,
mapAlias,
keyType,
valueType));
valueType),
repetition);
}
}

private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested)
private static GroupType mapKvWrapper(String name, Type elementType, Repetition repetition)
{
if (!nested.isRepetition(Repetition.REPEATED)) {
throw new IllegalArgumentException("Nested type should be repeated: " + nested);
if (!elementType.isRepetition(Repetition.REPEATED)) {
throw new IllegalArgumentException("Nested type should be repeated: " + elementType);
}
return new GroupType(repetition, alias, originalType, nested);
return Types.buildGroup(repetition)
.as(LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance())
.addField(elementType)
.named(name);
}

private static GroupType listWrapper(String name, OriginalType originalType, GroupType groupType)
private static GroupType listWrapper(String name, Type elementType)
{
return new GroupType(Repetition.OPTIONAL, name, originalType, groupType);
return Types.optionalGroup().as(LogicalTypeAnnotation.listType()).addField(elementType).named(name);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
import org.apache.parquet.schema.Type;
import org.apache.parquet.schema.Type.Repetition;
Expand All @@ -34,7 +34,6 @@
import java.util.Locale;

import static com.google.common.base.Preconditions.checkState;
import static org.apache.parquet.schema.OriginalType.MAP_KEY_VALUE;

/**
* This class is copied from org.apache.hadoop.hive.ql.io.parquet.convert.HiveSchemaConverter
Expand Down Expand Up @@ -70,7 +69,7 @@ private static Type convertType(String name, TypeInfo typeInfo, Repetition repet
{
if (typeInfo.getCategory() == Category.PRIMITIVE) {
if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) {
return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8)
return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(LogicalTypeAnnotation.stringType())
.named(name);
}
if (typeInfo.equals(TypeInfoFactory.intTypeInfo) ||
Expand Down Expand Up @@ -102,29 +101,29 @@ private static Type convertType(String name, TypeInfo typeInfo, Repetition repet
if (typeInfo.getTypeName().toLowerCase(Locale.ENGLISH).startsWith(
serdeConstants.CHAR_TYPE_NAME)) {
if (repetition == Repetition.OPTIONAL) {
return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
return Types.optional(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named(name);
}
return Types.repeated(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
return Types.repeated(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named(name);
}
if (typeInfo.getTypeName().toLowerCase(Locale.ENGLISH).startsWith(
serdeConstants.VARCHAR_TYPE_NAME)) {
if (repetition == Repetition.OPTIONAL) {
return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
return Types.optional(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named(name);
}
return Types.repeated(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
return Types.repeated(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named(name);
}
if (typeInfo instanceof DecimalTypeInfo) {
DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
int prec = decimalTypeInfo.precision();
int scale = decimalTypeInfo.scale();
int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1];
if (repetition == Repetition.OPTIONAL) {
return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name);
return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(LogicalTypeAnnotation.decimalType(scale, prec)).named(name);
}
return Types.repeated(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name);
return Types.repeated(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(LogicalTypeAnnotation.decimalType(scale, prec)).named(name);
}
if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) {
return Types.primitive(PrimitiveTypeName.INT32, repetition).as(OriginalType.DATE).named(name);
return Types.primitive(PrimitiveTypeName.INT32, repetition).as(LogicalTypeAnnotation.dateType()).named(name);
}
if (typeInfo.equals(TypeInfoFactory.unknownTypeInfo)) {
throw new UnsupportedOperationException("Unknown type not implemented");
Expand All @@ -150,7 +149,7 @@ private static Type convertType(String name, TypeInfo typeInfo, Repetition repet
private static GroupType convertArrayType(String name, ListTypeInfo typeInfo, Repetition repetition)
{
TypeInfo subType = typeInfo.getListElementTypeInfo();
return listWrapper(name, OriginalType.LIST, convertType("array_element", subType, Repetition.REPEATED), repetition);
return listWrapper(name, convertType("array_element", subType, Repetition.REPEATED), repetition);
}

// An optional group containing multiple elements
Expand All @@ -176,39 +175,40 @@ public static GroupType mapType(Repetition repetition, String alias, String mapA
{
//support projection only on key of a map
if (valueType == null) {
return listWrapper(
repetition,
return mapKvWrapper(
alias,
MAP_KEY_VALUE,
new GroupType(
Repetition.REPEATED,
mapAlias,
keyType));
keyType),
repetition);
}
if (!valueType.getName().equals("value")) {
throw new RuntimeException(valueType.getName() + " should be value");
}
return listWrapper(
repetition,
return mapKvWrapper(
alias,
MAP_KEY_VALUE,
new GroupType(
Repetition.REPEATED,
mapAlias,
keyType,
valueType));
valueType),
repetition);
}

private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested)
private static GroupType mapKvWrapper(String name, Type elementType, Repetition repetition)
{
if (!nested.isRepetition(Repetition.REPEATED)) {
throw new IllegalArgumentException("Nested type should be repeated: " + nested);
if (!elementType.isRepetition(Repetition.REPEATED)) {
throw new IllegalArgumentException("Nested type should be repeated: " + elementType);
}
return new GroupType(repetition, alias, originalType, nested);
return Types.buildGroup(repetition)
.as(LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance())
.addField(elementType)
.named(name);
}

private static GroupType listWrapper(String name, OriginalType originalType, Type elementType, Repetition repetition)
private static GroupType listWrapper(String name, Type elementType, Repetition repetition)
{
return new GroupType(repetition, name, originalType, elementType);
return Types.buildGroup(repetition).as(LogicalTypeAnnotation.listType()).addField(elementType).named(name);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.parquet.schema.ConversionPatterns;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
import org.apache.parquet.schema.Type;
import org.apache.parquet.schema.Type.Repetition;
Expand Down Expand Up @@ -71,7 +71,7 @@ private static Type convertType(String name, TypeInfo typeInfo,
{
if (typeInfo.getCategory() == Category.PRIMITIVE) {
if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) {
return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8)
return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(LogicalTypeAnnotation.stringType())
.named(name);
}
else if (typeInfo.equals(TypeInfoFactory.intTypeInfo) ||
Expand Down Expand Up @@ -103,19 +103,19 @@ else if (typeInfo.equals(TypeInfoFactory.voidTypeInfo)) {
else if (typeInfo.getTypeName().toLowerCase(Locale.ENGLISH).startsWith(
serdeConstants.CHAR_TYPE_NAME)) {
if (repetition == Repetition.OPTIONAL) {
return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
return Types.optional(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named(name);
}
else {
return Types.repeated(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
return Types.repeated(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named(name);
}
}
else if (typeInfo.getTypeName().toLowerCase(Locale.ENGLISH).startsWith(
serdeConstants.VARCHAR_TYPE_NAME)) {
if (repetition == Repetition.OPTIONAL) {
return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
return Types.optional(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named(name);
}
else {
return Types.repeated(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
return Types.repeated(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named(name);
}
}
else if (typeInfo instanceof DecimalTypeInfo) {
Expand All @@ -124,14 +124,14 @@ else if (typeInfo instanceof DecimalTypeInfo) {
int scale = decimalTypeInfo.scale();
int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1];
if (repetition == Repetition.OPTIONAL) {
return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name);
return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(LogicalTypeAnnotation.decimalType(scale, prec)).named(name);
}
else {
return Types.repeated(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name);
return Types.repeated(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(LogicalTypeAnnotation.decimalType(scale, prec)).named(name);
}
}
else if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) {
return Types.primitive(PrimitiveTypeName.INT32, repetition).as(OriginalType.DATE).named(name);
return Types.primitive(PrimitiveTypeName.INT32, repetition).as(LogicalTypeAnnotation.dateType()).named(name);
}
else if (typeInfo.equals(TypeInfoFactory.unknownTypeInfo)) {
throw new UnsupportedOperationException("Unknown type not implemented");
Expand Down Expand Up @@ -161,7 +161,7 @@ else if (typeInfo.getCategory() == Category.UNION) {
private static GroupType convertArrayType(String name, ListTypeInfo typeInfo, Repetition repetition)
{
TypeInfo subType = typeInfo.getListElementTypeInfo();
return listWrapper(name, OriginalType.LIST, convertType("array", subType, Repetition.REPEATED), repetition);
return listWrapper(name, convertType("array", subType, Repetition.REPEATED), repetition);
}

// An optional group containing multiple elements
Expand All @@ -183,9 +183,8 @@ private static GroupType convertMapType(String name, MapTypeInfo typeInfo, Repet
return ConversionPatterns.mapType(repetition, name, keyType, valueType);
}

private static GroupType listWrapper(String name, OriginalType originalType,
Type elementType, Repetition repetition)
private static GroupType listWrapper(String name, Type elementType, Repetition repetition)
{
return new GroupType(repetition, name, originalType, elementType);
return Types.buildGroup(repetition).as(LogicalTypeAnnotation.listType()).addField(elementType).named(name);
}
}
Loading

0 comments on commit 599bc28

Please sign in to comment.