Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove duplicate getParquetType method from ParquetPageSourceFactory #16699

Merged
merged 1 commit into from
Mar 25, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -329,21 +329,9 @@ public static Optional<MessageType> getParquetMessageType(List<HiveColumnHandle>
return message;
}

public static Optional<org.apache.parquet.schema.Type> getParquetType(GroupType groupType, boolean useParquetColumnNames, HiveColumnHandle column)
{
if (useParquetColumnNames) {
return Optional.ofNullable(getParquetTypeByName(column.getBaseColumnName(), groupType));
}
if (column.getBaseHiveColumnIndex() < groupType.getFieldCount()) {
return Optional.of(groupType.getType(column.getBaseHiveColumnIndex()));
}

return Optional.empty();
}

public static Optional<org.apache.parquet.schema.Type> getColumnType(HiveColumnHandle column, MessageType messageType, boolean useParquetColumnNames)
{
Optional<org.apache.parquet.schema.Type> columnType = getParquetType(messageType, useParquetColumnNames, column);
Optional<org.apache.parquet.schema.Type> columnType = getBaseColumnParquetType(column, messageType, useParquetColumnNames);
if (columnType.isEmpty() || column.getHiveColumnProjectionInfo().isEmpty()) {
return columnType;
}
Expand Down Expand Up @@ -453,13 +441,13 @@ public static TupleDomain<ColumnDescriptor> getParquetTupleDomain(
descriptor = descriptorsByPath.get(ImmutableList.of(columnHandle.getName()));
}
else {
org.apache.parquet.schema.Type parquetField = getParquetType(columnHandle, fileSchema, false);
if (parquetField == null || !parquetField.isPrimitive()) {
Optional<org.apache.parquet.schema.Type> parquetField = getBaseColumnParquetType(columnHandle, fileSchema, false);
if (parquetField.isEmpty() || !parquetField.get().isPrimitive()) {
// Parquet file has fewer column than partition
// Or the field is a complex type
continue;
}
descriptor = descriptorsByPath.get(ImmutableList.of(parquetField.getName()));
descriptor = descriptorsByPath.get(ImmutableList.of(parquetField.get().getName()));
}
if (descriptor != null) {
predicate.put(descriptor, entry.getValue());
Expand All @@ -468,18 +456,6 @@ public static TupleDomain<ColumnDescriptor> getParquetTupleDomain(
return TupleDomain.withColumnDomains(predicate.buildOrThrow());
}

public static org.apache.parquet.schema.Type getParquetType(HiveColumnHandle column, MessageType messageType, boolean useParquetColumnNames)
{
if (useParquetColumnNames) {
return getParquetTypeByName(column.getBaseColumnName(), messageType);
}

if (column.getBaseHiveColumnIndex() < messageType.getFieldCount()) {
return messageType.getType(column.getBaseHiveColumnIndex());
}
return null;
}

public interface ParquetReaderProvider
{
ParquetReader createParquetReader(List<Field> fields)
Expand All @@ -503,8 +479,8 @@ public static ConnectorPageSource createParquetPageSource(
continue;
}
checkArgument(column.getColumnType() == REGULAR, "column type must be REGULAR: %s", column);
org.apache.parquet.schema.Type parquetType = getParquetType(column, fileSchema, useColumnNames);
if (parquetType == null) {
Optional<org.apache.parquet.schema.Type> parquetType = getBaseColumnParquetType(column, fileSchema, useColumnNames);
if (parquetType.isEmpty()) {
pageSourceBuilder.addNullColumn(column.getBaseType());
continue;
}
Expand All @@ -521,4 +497,16 @@ public static ConnectorPageSource createParquetPageSource(

return pageSourceBuilder.build(parquetReaderProvider.createParquetReader(parquetColumnFieldsBuilder.build()));
}

private static Optional<org.apache.parquet.schema.Type> getBaseColumnParquetType(HiveColumnHandle column, MessageType messageType, boolean useParquetColumnNames)
{
if (useParquetColumnNames) {
return Optional.ofNullable(getParquetTypeByName(column.getBaseColumnName(), messageType));
}
if (column.getBaseHiveColumnIndex() < messageType.getFieldCount()) {
return Optional.of(messageType.getType(column.getBaseHiveColumnIndex()));
}

return Optional.empty();
}
}