Skip to content

Commit

Permalink
Forbid fields/variables differing in case only
Browse files Browse the repository at this point in the history
The downside, and biggest change here, is that Hive SerDe variables need
to be written as `serde` and not `serDe`. Still, the camel-case of this
specific word was applied very inconsistently, so the unification is
worth the change.
  • Loading branch information
findepi committed Jan 17, 2022
1 parent adb4c35 commit 4d0d1cf
Show file tree
Hide file tree
Showing 34 changed files with 124 additions and 124 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ public static class Context
public int rowsPerPartition;

@Param({"0", "1", "2", "3"})
public int numberOfPregroupedColumns;
public int numberOfPreGroupedColumns;

@Param({"10", "50", "100"})
public int partitionsPerGroup;
Expand All @@ -96,7 +96,7 @@ public void setup()
executor = newCachedThreadPool(daemonThreadsNamed(getClass().getSimpleName() + "-%s"));
scheduledExecutor = newScheduledThreadPool(2, daemonThreadsNamed(getClass().getSimpleName() + "-scheduledExecutor-%s"));

createOperatorFactoryAndGenerateTestData(numberOfPregroupedColumns);
createOperatorFactoryAndGenerateTestData(numberOfPreGroupedColumns);
}

@TearDown
Expand Down Expand Up @@ -311,7 +311,7 @@ private void verify(
Context context = new Context();

context.rowsPerPartition = numberOfRowsPerPartition;
context.numberOfPregroupedColumns = numberOfPreGroupedColumns;
context.numberOfPreGroupedColumns = numberOfPreGroupedColumns;

if (useSinglePartition) {
context.partitionsPerGroup = 1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1588,15 +1588,15 @@ private void createEmptyFiles(ConnectorSession session, Path path, Table table,
}
hdfsEnvironment.doAs(session.getIdentity(), () -> {
for (String fileName : fileNames) {
writeEmptyFile(session, new Path(path, fileName), conf, schema, format.getSerDe(), format.getOutputFormat());
writeEmptyFile(session, new Path(path, fileName), conf, schema, format.getSerde(), format.getOutputFormat());
}
});
}

private static void writeEmptyFile(ConnectorSession session, Path target, JobConf conf, Properties properties, String serDe, String outputFormatName)
private static void writeEmptyFile(ConnectorSession session, Path target, JobConf conf, Properties properties, String serde, String outputFormatName)
{
// Some serializers such as Avro set a property in the schema.
initializeSerializer(conf, properties, serDe);
initializeSerializer(conf, properties, serde);

// The code below is not a try with resources because RecordWriter is not Closeable.
FileSinkOperator.RecordWriter recordWriter = HiveWriteUtils.createRecordWriter(target, conf, properties, outputFormatName, session);
Expand Down Expand Up @@ -3135,10 +3135,10 @@ private static HiveStorageFormat extractHiveStorageFormat(Table table)
{
StorageFormat storageFormat = table.getStorage().getStorageFormat();
String outputFormat = storageFormat.getOutputFormat();
String serde = storageFormat.getSerDe();
String serde = storageFormat.getSerde();

for (HiveStorageFormat format : HiveStorageFormat.values()) {
if (format.getOutputFormat().equals(outputFormat) && format.getSerDe().equals(serde)) {
if (format.getOutputFormat().equals(outputFormat) && format.getSerde().equals(serde)) {
return format;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ public enum HiveStorageFormat
this.estimatedWriterSystemMemoryUsage = requireNonNull(estimatedWriterSystemMemoryUsage, "estimatedWriterSystemMemoryUsage is null");
}

public String getSerDe()
public String getSerde()
{
return serde;
}
Expand Down Expand Up @@ -168,16 +168,16 @@ else if (type.getCategory() == Category.PRIMITIVE) {
}

private static final Map<SerdeAndInputFormat, HiveStorageFormat> HIVE_STORAGE_FORMAT_FROM_STORAGE_FORMAT = Arrays.stream(HiveStorageFormat.values())
.collect(toImmutableMap(format -> new SerdeAndInputFormat(format.getSerDe(), format.getInputFormat()), identity()));
.collect(toImmutableMap(format -> new SerdeAndInputFormat(format.getSerde(), format.getInputFormat()), identity()));

private static final class SerdeAndInputFormat
{
private final String serDe;
private final String serde;
private final String inputFormat;

public SerdeAndInputFormat(String serDe, String inputFormat)
public SerdeAndInputFormat(String serde, String inputFormat)
{
this.serDe = serDe;
this.serde = serde;
this.inputFormat = inputFormat;
}

Expand All @@ -191,19 +191,19 @@ public boolean equals(Object o)
return false;
}
SerdeAndInputFormat that = (SerdeAndInputFormat) o;
return serDe.equals(that.serDe) && inputFormat.equals(that.inputFormat);
return serde.equals(that.serde) && inputFormat.equals(that.inputFormat);
}

@Override
public int hashCode()
{
return Objects.hash(serDe, inputFormat);
return Objects.hash(serde, inputFormat);
}
}

public static Optional<HiveStorageFormat> getHiveStorageFormat(StorageFormat storageFormat)
{
return Optional.ofNullable(HIVE_STORAGE_FORMAT_FROM_STORAGE_FORMAT.get(new SerdeAndInputFormat(storageFormat.getSerDe(), storageFormat.getInputFormat())));
return Optional.ofNullable(HIVE_STORAGE_FORMAT_FROM_STORAGE_FORMAT.get(new SerdeAndInputFormat(storageFormat.getSerde(), storageFormat.getInputFormat())));
}

private static PrimitiveTypeInfo primitiveTypeInfo(TypeInfo typeInfo)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ public static boolean isSupportedType(TypeInfo typeInfo, StorageFormat storageFo
// 3. The Parquet format doesn't support uniontypes itself so there's no need to add support for it in Trino.
// 4. TODO: RCFile tables are not supported yet.
// 5. TODO: The support for Avro is done in SerDeUtils so it's possible that formats other than Avro are also supported. But verification is needed.
if (storageFormat.getSerDe().equalsIgnoreCase(AVRO.getSerDe()) || storageFormat.getSerDe().equalsIgnoreCase(ORC.getSerDe())) {
if (storageFormat.getSerde().equalsIgnoreCase(AVRO.getSerde()) || storageFormat.getSerde().equalsIgnoreCase(ORC.getSerde())) {
UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
return unionTypeInfo.getAllUnionObjectTypeInfos().stream()
.allMatch(fieldTypeInfo -> isSupportedType(fieldTypeInfo, storageFormat));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,10 @@ public Optional<FileWriter> createFileWriter(
}

RcFileEncoding rcFileEncoding;
if (LazyBinaryColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
if (LazyBinaryColumnarSerDe.class.getName().equals(storageFormat.getSerde())) {
rcFileEncoding = new BinaryRcFileEncoding(timeZone);
}
else if (ColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
else if (ColumnarSerDe.class.getName().equals(storageFormat.getSerde())) {
rcFileEncoding = createTextVectorEncoding(schema);
}
else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ public RecordFileWriter(

fieldCount = fileColumnNames.size();

String serDe = storageFormat.getSerDe();
serializer = initializeSerializer(conf, schema, serDe);
String serde = storageFormat.getSerde();
serializer = initializeSerializer(conf, schema, serde);

List<ObjectInspector> objectInspectors = getRowColumnInspectors(fileColumnTypes);
tableInspector = getStandardStructObjectInspector(fileColumnNames, objectInspectors);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,10 @@ public static class HiveViewReader
private final HiveMetastoreClient metastoreClient;
private final TypeManager typeManager;

public HiveViewReader(HiveMetastoreClient hiveMetastoreClient, TypeManager typemanager)
public HiveViewReader(HiveMetastoreClient hiveMetastoreClient, TypeManager typeManager)
{
this.metastoreClient = requireNonNull(hiveMetastoreClient, "hiveMetastoreClient is null");
this.typeManager = requireNonNull(typemanager, "typeManager is null");
this.typeManager = requireNonNull(typeManager, "typeManager is null");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ private static Properties getHiveSchema(
for (Map.Entry<String, String> param : sd.getSerdeParameters().entrySet()) {
schema.setProperty(param.getKey(), (param.getValue() != null) ? param.getValue() : "");
}
schema.setProperty(SERIALIZATION_LIB, sd.getStorageFormat().getSerDe());
schema.setProperty(SERIALIZATION_LIB, sd.getStorageFormat().getSerde());

StringBuilder columnNameBuilder = new StringBuilder();
StringBuilder columnTypeBuilder = new StringBuilder();
Expand Down Expand Up @@ -217,7 +217,7 @@ public static ProtectMode getProtectMode(Table table)

public static boolean isAvroTableWithSchemaSet(Table table)
{
return AVRO.getSerDe().equals(table.getStorage().getStorageFormat().getSerDeNullable()) &&
return AVRO.getSerde().equals(table.getStorage().getStorageFormat().getSerDeNullable()) &&
(table.getParameters().get(AVRO_SCHEMA_URL_KEY) != null ||
(table.getStorage().getSerdeParameters().get(AVRO_SCHEMA_URL_KEY) != null));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,23 +32,23 @@ public class StorageFormat
{
public static final StorageFormat VIEW_STORAGE_FORMAT = createNullable(null, null, null);

private final String serDe;
private final String serde;
private final String inputFormat;
private final String outputFormat;

private StorageFormat(String serDe, String inputFormat, String outputFormat)
private StorageFormat(String serde, String inputFormat, String outputFormat)
{
this.serDe = serDe;
this.serde = serde;
this.inputFormat = inputFormat;
this.outputFormat = outputFormat;
}

public String getSerDe()
public String getSerde()
{
if (serDe == null) {
if (serde == null) {
throw new TrinoException(HIVE_INVALID_METADATA, "SerDe is not present in StorageFormat");
}
return serDe;
return serde;
}

public String getInputFormat()
Expand All @@ -67,10 +67,10 @@ public String getOutputFormat()
return outputFormat;
}

@JsonProperty("serDe")
@JsonProperty("serde")
public String getSerDeNullable()
{
return serDe;
return serde;
}

@JsonProperty("inputFormat")
Expand All @@ -87,24 +87,24 @@ public String getOutputFormatNullable()

public static StorageFormat fromHiveStorageFormat(HiveStorageFormat hiveStorageFormat)
{
return new StorageFormat(hiveStorageFormat.getSerDe(), hiveStorageFormat.getInputFormat(), hiveStorageFormat.getOutputFormat());
return new StorageFormat(hiveStorageFormat.getSerde(), hiveStorageFormat.getInputFormat(), hiveStorageFormat.getOutputFormat());
}

public static StorageFormat create(String serde, String inputFormat, String outputFormat)
{
return new StorageFormat(
requireNonNull(serde, "serDe is null"),
requireNonNull(serde, "serde is null"),
requireNonNull(inputFormat, "inputFormat is null"),
requireNonNull(outputFormat, "outputFormat is null"));
}

@JsonCreator
public static StorageFormat createNullable(
@JsonProperty("serDe") String serDe,
@JsonProperty("serde") String serde,
@JsonProperty("inputFormat") String inputFormat,
@JsonProperty("outputFormat") String outputFormat)
{
return new StorageFormat(serDe, inputFormat, outputFormat);
return new StorageFormat(serde, inputFormat, outputFormat);
}

@Override
Expand All @@ -117,22 +117,22 @@ public boolean equals(Object o)
return false;
}
StorageFormat that = (StorageFormat) o;
return Objects.equals(serDe, that.serDe) &&
return Objects.equals(serde, that.serde) &&
Objects.equals(inputFormat, that.inputFormat) &&
Objects.equals(outputFormat, that.outputFormat);
}

@Override
public int hashCode()
{
return Objects.hash(serDe, inputFormat, outputFormat);
return Objects.hash(serde, inputFormat, outputFormat);
}

@Override
public String toString()
{
return toStringHelper(this)
.add("serDe", serDe)
.add("serde", serde)
.add("inputFormat", inputFormat)
.add("outputFormat", outputFormat)
.toString();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,11 @@ public static Table convertTable(com.amazonaws.services.glue.model.Table glueTab
return tableBuilder.build();
}

private static Column convertColumn(com.amazonaws.services.glue.model.Column glueColumn, String serDe)
private static Column convertColumn(com.amazonaws.services.glue.model.Column glueColumn, String serde)
{
// OpenCSVSerde deserializes columns from csv file into strings, so we set the column type from the metastore
// to string to avoid cast exceptions.
if (HiveStorageFormat.CSV.getSerDe().equals(serDe)) {
if (HiveStorageFormat.CSV.getSerde().equals(serde)) {
//TODO(https://github.com/trinodb/trino/issues/7240) Add tests
return new Column(glueColumn.getName(), HiveType.HIVE_STRING, Optional.ofNullable(glueColumn.getComment()));
}
Expand All @@ -107,9 +107,9 @@ private static Column convertColumn(com.amazonaws.services.glue.model.Column glu
}
}

private static List<Column> convertColumns(List<com.amazonaws.services.glue.model.Column> glueColumns, String serDe)
private static List<Column> convertColumns(List<com.amazonaws.services.glue.model.Column> glueColumns, String serde)
{
return mappedCopy(glueColumns, glueColumn -> convertColumn(glueColumn, serDe));
return mappedCopy(glueColumns, glueColumn -> convertColumn(glueColumn, serde));
}

private static Map<String, String> convertParameters(Map<String, String> parameters)
Expand Down Expand Up @@ -147,7 +147,7 @@ public GluePartitionConverter(Table table)
this.tableName = requireNonNull(table.getTableName(), "tableName is null");
this.tableParameters = convertParameters(table.getParameters());
this.columnsConverter = memoizeLast(glueColumns -> convertColumns(glueColumns,
table.getStorage().getStorageFormat().getSerDe()));
table.getStorage().getStorageFormat().getSerde()));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -417,12 +417,12 @@ public static boolean isAvroTableWithSchemaSet(org.apache.hadoop.hive.metastore.
return serdeInfo.getSerializationLib() != null &&
(table.getParameters().get(AVRO_SCHEMA_URL_KEY) != null ||
(serdeInfo.getParameters() != null && serdeInfo.getParameters().get(AVRO_SCHEMA_URL_KEY) != null)) &&
serdeInfo.getSerializationLib().equals(AVRO.getSerDe());
serdeInfo.getSerializationLib().equals(AVRO.getSerde());
}

public static boolean isCsvTable(org.apache.hadoop.hive.metastore.api.Table table)
{
return CSV.getSerDe().equals(getSerdeInfo(table).getSerializationLib());
return CSV.getSerde().equals(getSerdeInfo(table).getSerializationLib());
}

public static List<FieldSchema> csvSchemaFields(List<FieldSchema> schemas)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -326,14 +326,14 @@ public static Optional<CompressionCodec> getCompressionCodec(TextInputFormat inp

Class<? extends InputFormat<?, ?>> inputFormatClass = getInputFormatClass(jobConf, inputFormatName);
if (symlinkTarget && inputFormatClass == SymlinkTextInputFormat.class) {
String serDe = getDeserializerClassName(schema);
String serde = getDeserializerClassName(schema);
for (HiveStorageFormat format : HiveStorageFormat.values()) {
if (serDe.equals(format.getSerDe())) {
if (serde.equals(format.getSerde())) {
inputFormatClass = getInputFormatClass(jobConf, format.getInputFormat());
return ReflectionUtils.newInstance(inputFormatClass, jobConf);
}
}
throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, "Unknown SerDe for SymlinkTextInputFormat: " + serDe);
throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, "Unknown SerDe for SymlinkTextInputFormat: " + serde);
}

return ReflectionUtils.newInstance(inputFormatClass, jobConf);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,6 @@
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.base.Verify.verify;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableMap.builder;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static com.google.common.collect.Iterables.concat;
Expand Down Expand Up @@ -5149,7 +5148,7 @@ protected void createEmptyTable(

tableBuilder.getStorageBuilder()
.setLocation(targetPath.toString())
.setStorageFormat(StorageFormat.create(hiveStorageFormat.getSerDe(), hiveStorageFormat.getInputFormat(), hiveStorageFormat.getOutputFormat()))
.setStorageFormat(StorageFormat.create(hiveStorageFormat.getSerde(), hiveStorageFormat.getInputFormat(), hiveStorageFormat.getOutputFormat()))
.setBucketProperty(bucketProperty)
.setSerdeParameters(ImmutableMap.of());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -608,7 +608,7 @@ public static FileSplit createTestFileHive(
throws Exception
{
HiveOutputFormat<?, ?> outputFormat = newInstance(storageFormat.getOutputFormat(), HiveOutputFormat.class);
Serializer serializer = newInstance(storageFormat.getSerDe(), Serializer.class);
Serializer serializer = newInstance(storageFormat.getSerde(), Serializer.class);

// filter out partition keys, which are not written to the file
testColumns = testColumns.stream()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ private void createExternalTable(SchemaTableName schemaTableName, HiveStorageFor

tableBuilder.getStorageBuilder()
.setLocation(externalLocation.toString())
.setStorageFormat(StorageFormat.create(hiveStorageFormat.getSerDe(), hiveStorageFormat.getInputFormat(), hiveStorageFormat.getOutputFormat()))
.setStorageFormat(StorageFormat.create(hiveStorageFormat.getSerde(), hiveStorageFormat.getInputFormat(), hiveStorageFormat.getOutputFormat()))
.setBucketProperty(bucketProperty)
.setSerdeParameters(ImmutableMap.of());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -862,7 +862,7 @@ public void testBuildManifestFileIterator()
CachingDirectoryLister directoryLister = new CachingDirectoryLister(new Duration(5, TimeUnit.MINUTES), 1000, ImmutableList.of());
Properties schema = new Properties();
schema.setProperty(FILE_INPUT_FORMAT, SymlinkTextInputFormat.class.getName());
schema.setProperty(SERIALIZATION_LIB, AVRO.getSerDe());
schema.setProperty(SERIALIZATION_LIB, AVRO.getSerde());

Path firstFilePath = new Path("hdfs://VOL1:9000/db_name/table_name/file1");
Path secondFilePath = new Path("hdfs://VOL1:9000/db_name/table_name/file2");
Expand Down Expand Up @@ -900,7 +900,7 @@ public void testBuildManifestFileIteratorNestedDirectory()
CachingDirectoryLister directoryLister = new CachingDirectoryLister(new Duration(5, TimeUnit.MINUTES), 1000, ImmutableList.of());
Properties schema = new Properties();
schema.setProperty(FILE_INPUT_FORMAT, SymlinkTextInputFormat.class.getName());
schema.setProperty(SERIALIZATION_LIB, AVRO.getSerDe());
schema.setProperty(SERIALIZATION_LIB, AVRO.getSerde());

Path filePath = new Path("hdfs://VOL1:9000/db_name/table_name/file1");
Path directoryPath = new Path("hdfs://VOL1:9000/db_name/table_name/dir");
Expand Down
Loading

0 comments on commit 4d0d1cf

Please sign in to comment.