From 527dd0d2afb2e5d3ac84eda32b0ebf12fe5e34fa Mon Sep 17 00:00:00 2001 From: Raunaq Morarka Date: Sun, 8 Sep 2024 13:15:16 +0530 Subject: [PATCH] Avoid extra TrinoInputFile#lastModified calls in hive filesystem cache --- .../trino/plugin/hive/HivePageSourceFactory.java | 1 + .../trino/plugin/hive/HivePageSourceProvider.java | 3 +++ .../plugin/hive/avro/AvroPageSourceFactory.java | 1 + .../plugin/hive/line/LinePageSourceFactory.java | 1 + .../plugin/hive/orc/OrcPageSourceFactory.java | 6 +++++- .../hive/parquet/ParquetPageSourceFactory.java | 4 +++- .../hive/rcfile/RcFilePageSourceFactory.java | 1 + .../hive/TestHiveAlluxioCacheFileOperations.java | 14 -------------- .../io/trino/plugin/hive/TestHiveFileFormats.java | 1 + .../hive/TestOrcPageSourceMemoryTracking.java | 1 + .../plugin/hive/orc/TestOrcPageSourceFactory.java | 1 + .../trino/plugin/hive/orc/TestOrcPredicates.java | 5 ++++- .../io/trino/plugin/hive/parquet/ParquetUtil.java | 1 + 13 files changed, 23 insertions(+), 17 deletions(-) diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceFactory.java index 344063332a3b..52dfabaf2e08 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceFactory.java @@ -31,6 +31,7 @@ Optional createPageSource( long start, long length, long estimatedFileSize, + long fileModifiedTime, Map schema, List columns, TupleDomain effectivePredicate, diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java index a0b815a41b88..325f2292da83 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/HivePageSourceProvider.java @@ -144,6 +144,7 @@ public ConnectorPageSource createPageSource( hiveSplit.getStart(), hiveSplit.getLength(), hiveSplit.getEstimatedFileSize(), + hiveSplit.getFileModifiedTime(), hiveSplit.getSchema(), hiveTable.getCompactEffectivePredicate().intersect( dynamicFilter.getCurrentPredicate().transformKeys(HiveColumnHandle.class::cast)) @@ -175,6 +176,7 @@ public static Optional createHivePageSource( long start, long length, long estimatedFileSize, + long fileModifiedTime, Map schema, TupleDomain effectivePredicate, TypeManager typeManager, @@ -205,6 +207,7 @@ public static Optional createHivePageSource( start, length, estimatedFileSize, + fileModifiedTime, schema, desiredColumns, effectivePredicate, diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/avro/AvroPageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/avro/AvroPageSourceFactory.java index 525654d81ef4..ac58e390e562 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/avro/AvroPageSourceFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/avro/AvroPageSourceFactory.java @@ -87,6 +87,7 @@ public Optional createPageSource( long start, long length, long estimatedFileSize, + long fileModifiedTime, Map schema, List columns, TupleDomain effectivePredicate, diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/line/LinePageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/line/LinePageSourceFactory.java index 796d46bce451..7498bdc9a87b 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/line/LinePageSourceFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/line/LinePageSourceFactory.java @@ -82,6 +82,7 @@ public Optional createPageSource( long start, long length, long estimatedFileSize, + long fileModifiedTime, Map schema, List columns, TupleDomain effectivePredicate, diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcPageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcPageSourceFactory.java index da7a6c1401ef..68f4fde033be 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcPageSourceFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/orc/OrcPageSourceFactory.java @@ -55,6 +55,7 @@ import org.joda.time.DateTimeZone; import java.io.IOException; +import java.time.Instant; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -173,6 +174,7 @@ public Optional createPageSource( long start, long length, long estimatedFileSize, + long fileModifiedTime, Map schema, List columns, TupleDomain effectivePredicate, @@ -200,6 +202,7 @@ public Optional createPageSource( start, length, estimatedFileSize, + fileModifiedTime, readerColumnHandles, columns, isUseOrcColumnNames(session), @@ -230,6 +233,7 @@ private ConnectorPageSource createOrcPageSource( long start, long length, long estimatedFileSize, + long fileModifiedTime, List columns, List projections, boolean useOrcColumnNames, @@ -253,7 +257,7 @@ private ConnectorPageSource createOrcPageSource( boolean originalFilesPresent = acidInfo.isPresent() && !acidInfo.get().getOriginalFiles().isEmpty(); try { TrinoFileSystem fileSystem = fileSystemFactory.create(session); - TrinoInputFile inputFile = fileSystem.newInputFile(path, estimatedFileSize); + TrinoInputFile inputFile = fileSystem.newInputFile(path, estimatedFileSize, Instant.ofEpochMilli(fileModifiedTime)); orcDataSource = new HdfsOrcDataSource( new OrcDataSourceId(path.toString()), estimatedFileSize, diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetPageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetPageSourceFactory.java index f16399c17d38..dcd88f2523d6 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetPageSourceFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/parquet/ParquetPageSourceFactory.java @@ -61,6 +61,7 @@ import org.joda.time.DateTimeZone; import java.io.IOException; +import java.time.Instant; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -160,6 +161,7 @@ public Optional createPageSource( long start, long length, long estimatedFileSize, + long fileModifiedTime, Map schema, List columns, TupleDomain effectivePredicate, @@ -175,7 +177,7 @@ public Optional createPageSource( checkArgument(acidInfo.isEmpty(), "Acid is not supported"); TrinoFileSystem fileSystem = fileSystemFactory.create(session); - TrinoInputFile inputFile = fileSystem.newInputFile(path, estimatedFileSize); + TrinoInputFile inputFile = fileSystem.newInputFile(path, estimatedFileSize, Instant.ofEpochMilli(fileModifiedTime)); return Optional.of(createPageSource( inputFile, diff --git a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java index 4ca50893bc93..3fe0d461804e 100644 --- a/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java +++ b/plugin/trino-hive/src/main/java/io/trino/plugin/hive/rcfile/RcFilePageSourceFactory.java @@ -94,6 +94,7 @@ public Optional createPageSource( long start, long length, long estimatedFileSize, + long fileModifiedTime, Map schema, List columns, TupleDomain effectivePredicate, diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveAlluxioCacheFileOperations.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveAlluxioCacheFileOperations.java index ad42ed7e328f..0d8b3955dd7a 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveAlluxioCacheFileOperations.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveAlluxioCacheFileOperations.java @@ -84,16 +84,12 @@ public void testCacheFileOperations() .add(new CacheOperation("Input.readFully", "key=p2/")) .add(new CacheOperation("Alluxio.writeCache", "key=p1/")) .add(new CacheOperation("Alluxio.writeCache", "key=p2/")) - .add(new CacheOperation("InputFile.lastModified", "key=p1/")) - .add(new CacheOperation("InputFile.lastModified", "key=p2/")) .build()); assertFileSystemAccesses( "SELECT * FROM test_cache_file_operations", ImmutableMultiset.builder() .add(new CacheOperation("Alluxio.readCached", "key=p1/")) .add(new CacheOperation("Alluxio.readCached", "key=p2/")) - .add(new CacheOperation("InputFile.lastModified", "key=p1/")) - .add(new CacheOperation("InputFile.lastModified", "key=p2/")) .build()); assertUpdate("INSERT INTO test_cache_file_operations VALUES ('3-xyz', 'p3')", 1); assertUpdate("INSERT INTO test_cache_file_operations VALUES ('4-xyz', 'p4')", 1); @@ -112,11 +108,6 @@ public void testCacheFileOperations() .add(new CacheOperation("Alluxio.writeCache", "key=p3/")) .add(new CacheOperation("Alluxio.writeCache", "key=p4/")) .add(new CacheOperation("Alluxio.writeCache", "key=p5/")) - .add(new CacheOperation("InputFile.lastModified", "key=p1/")) - .add(new CacheOperation("InputFile.lastModified", "key=p2/")) - .add(new CacheOperation("InputFile.lastModified", "key=p3/")) - .add(new CacheOperation("InputFile.lastModified", "key=p4/")) - .add(new CacheOperation("InputFile.lastModified", "key=p5/")) .build()); assertFileSystemAccesses( "SELECT * FROM test_cache_file_operations", @@ -126,11 +117,6 @@ public void testCacheFileOperations() .add(new CacheOperation("Alluxio.readCached", "key=p3/")) .add(new CacheOperation("Alluxio.readCached", "key=p4/")) .add(new CacheOperation("Alluxio.readCached", "key=p5/")) - .add(new CacheOperation("InputFile.lastModified", "key=p1/")) - .add(new CacheOperation("InputFile.lastModified", "key=p2/")) - .add(new CacheOperation("InputFile.lastModified", "key=p3/")) - .add(new CacheOperation("InputFile.lastModified", "key=p4/")) - .add(new CacheOperation("InputFile.lastModified", "key=p5/")) .build()); } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java index f807a2e1a6a8..14b2dc78dc93 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveFileFormats.java @@ -1007,6 +1007,7 @@ private static void testPageSourceFactory( 0, fileSize, paddedFileSize, + 12345, splitProperties, TupleDomain.all(), TESTING_TYPE_MANAGER, diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestOrcPageSourceMemoryTracking.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestOrcPageSourceMemoryTracking.java index 70019c71a196..c7595047ed4f 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestOrcPageSourceMemoryTracking.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestOrcPageSourceMemoryTracking.java @@ -577,6 +577,7 @@ public ConnectorPageSource newPageSource(FileFormatDataSourceStats stats, Connec fileSplit.getStart(), fileSplit.getLength(), fileSplit.getLength(), + 12345, schema, TupleDomain.all(), TESTING_TYPE_MANAGER, diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPageSourceFactory.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPageSourceFactory.java index c36538dd220b..9a03efc52380 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPageSourceFactory.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPageSourceFactory.java @@ -256,6 +256,7 @@ private static List readFile( 0, fileSize, fileSize, + 12345, createSchema(), columnHandles, tupleDomain, diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPredicates.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPredicates.java index faaf64abc117..bca72e5a8b6c 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPredicates.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/orc/TestOrcPredicates.java @@ -18,6 +18,7 @@ import com.google.common.collect.ImmutableSet; import io.trino.filesystem.Location; import io.trino.filesystem.TrinoFileSystemFactory; +import io.trino.filesystem.TrinoInputFile; import io.trino.filesystem.memory.MemoryFileSystemFactory; import io.trino.metastore.HiveType; import io.trino.orc.OrcReaderOptions; @@ -160,7 +161,8 @@ private static ConnectorPageSource createPageSource( { OrcPageSourceFactory readerFactory = new OrcPageSourceFactory(new OrcReaderOptions(), fileSystemFactory, STATS, UTC); - long length = fileSystemFactory.create(session).newInputFile(location).length(); + TrinoInputFile inputFile = fileSystemFactory.create(session).newInputFile(location); + long length = inputFile.length(); List columnMappings = buildColumnMappings( "", ImmutableList.of(), @@ -180,6 +182,7 @@ private static ConnectorPageSource createPageSource( 0, length, length, + inputFile.lastModified().toEpochMilli(), getTableProperties(), effectivePredicate, TESTING_TYPE_MANAGER, diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/ParquetUtil.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/ParquetUtil.java index 571b74c111a0..c1bba68fdd76 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/ParquetUtil.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/ParquetUtil.java @@ -95,6 +95,7 @@ private static ConnectorPageSource createPageSource(ConnectorSession session, Fi 0, parquetFile.length(), parquetFile.length(), + parquetFile.lastModified(), ImmutableMap.of(SERIALIZATION_LIB, HiveStorageFormat.PARQUET.getSerde()), columns, domain,