Skip to content

Commit

Permalink
Add max-partitions-for-eager-load property
Browse files Browse the repository at this point in the history
To have the hive.max-partitions-per-scan property only as a limit
  • Loading branch information
arhimondr committed Oct 7, 2022
1 parent 911d7ca commit f76fa46
Show file tree
Hide file tree
Showing 24 changed files with 51 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ public class HiveConfig

private DataSize maxSplitSize = DataSize.of(64, MEGABYTE);
private int maxPartitionsPerScan = 100_000;
private int maxPartitionsForEagerLoad = 100_000;
private int maxOutstandingSplits = 1_000;
private DataSize maxOutstandingSplitsSize = DataSize.of(256, MEGABYTE);
private int maxSplitIteratorThreads = 1_000;
Expand Down Expand Up @@ -401,6 +402,26 @@ public HiveConfig setMaxPartitionsPerScan(int maxPartitionsPerScan)
return this;
}

@Min(1)
public int getMaxPartitionsForEagerLoad()
{
return maxPartitionsForEagerLoad;
}

@Config("hive.max-partitions-for-eager-load")
@ConfigDescription("Maximum allowed partitions for a single table scan to be loaded eagerly on coordinator. Certain optimizations are not possible without eager loading.")
public HiveConfig setMaxPartitionsForEagerLoad(int maxPartitionsForEagerLoad)
{
this.maxPartitionsForEagerLoad = maxPartitionsForEagerLoad;
return this;
}

@AssertTrue(message = "The value of hive.max-partitions-for-eager-load is expected to be less than or equal to hive.max-partitions-per-scan")
public boolean isMaxPartitionsForEagerLoadValid()
{
return maxPartitionsForEagerLoad <= maxPartitionsPerScan;
}

@Min(1)
public int getMaxOutstandingSplits()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,23 +49,23 @@

public class HivePartitionManager
{
private final int maxPartitions;
private final int maxPartitionsForEagerLoad;
private final int domainCompactionThreshold;

@Inject
public HivePartitionManager(HiveConfig hiveConfig)
{
this(
hiveConfig.getMaxPartitionsPerScan(),
hiveConfig.getMaxPartitionsForEagerLoad(),
hiveConfig.getDomainCompactionThreshold());
}

public HivePartitionManager(
int maxPartitions,
int maxPartitionsForEagerLoad,
int domainCompactionThreshold)
{
checkArgument(maxPartitions >= 1, "maxPartitions must be at least 1");
this.maxPartitions = maxPartitions;
checkArgument(maxPartitionsForEagerLoad >= 1, "maxPartitionsForEagerLoad must be at least 1");
this.maxPartitionsForEagerLoad = maxPartitionsForEagerLoad;
checkArgument(domainCompactionThreshold >= 1, "domainCompactionThreshold must be at least 1");
this.domainCompactionThreshold = domainCompactionThreshold;
}
Expand Down Expand Up @@ -201,7 +201,7 @@ public Iterator<HivePartition> getPartitions(SemiTransactionalHiveMetastore meta
public boolean canPartitionsBeLoaded(HivePartitionResult partitionResult)
{
if (partitionResult.getPartitionNames().isPresent()) {
return partitionResult.getPartitionNames().orElseThrow().size() <= maxPartitions;
return partitionResult.getPartitionNames().orElseThrow().size() <= maxPartitionsForEagerLoad;
}
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ public DistributedQueryRunner build()
hiveProperties.put("hive.parquet.time-zone", TIME_ZONE.getID());
}
hiveProperties.put("hive.max-partitions-per-scan", "1000");
hiveProperties.put("hive.max-partitions-for-eager-load", "1000");
hiveProperties.put("hive.security", security);
hiveProperties.putAll(this.hiveProperties.buildOrThrow());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ public void testDefaults()
.setSingleStatementWritesOnly(false)
.setMaxSplitSize(DataSize.of(64, Unit.MEGABYTE))
.setMaxPartitionsPerScan(100_000)
.setMaxPartitionsForEagerLoad(100_000)
.setMaxOutstandingSplits(1_000)
.setMaxOutstandingSplitsSize(DataSize.of(256, Unit.MEGABYTE))
.setMaxSplitIteratorThreads(1_000)
Expand Down Expand Up @@ -126,6 +127,7 @@ public void testExplicitPropertyMappings()
.put("hive.single-statement-writes", "true")
.put("hive.max-split-size", "256MB")
.put("hive.max-partitions-per-scan", "123")
.put("hive.max-partitions-for-eager-load", "122")
.put("hive.max-outstanding-splits", "10")
.put("hive.max-outstanding-splits-size", "32MB")
.put("hive.max-split-iterator-threads", "10")
Expand Down Expand Up @@ -209,6 +211,7 @@ public void testExplicitPropertyMappings()
.setSingleStatementWritesOnly(true)
.setMaxSplitSize(DataSize.of(256, Unit.MEGABYTE))
.setMaxPartitionsPerScan(123)
.setMaxPartitionsForEagerLoad(122)
.setMaxOutstandingSplits(10)
.setMaxOutstandingSplitsSize(DataSize.of(32, Unit.MEGABYTE))
.setMaxSplitIteratorThreads(10)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import io.trino.plugin.hive.metastore.HiveMetastoreConfig;
import io.trino.plugin.hive.metastore.file.FileHiveMetastore;
import io.trino.plugin.hive.metastore.file.FileHiveMetastoreConfig;
import io.trino.spi.TrinoException;
import io.trino.spi.security.PrincipalType;
import io.trino.sql.planner.OptimizerConfig.JoinDistributionType;
import io.trino.sql.planner.OptimizerConfig.JoinReorderingStrategy;
Expand Down Expand Up @@ -64,7 +63,6 @@
import static io.trino.sql.planner.plan.ExchangeNode.Type.REPLICATE;
import static io.trino.sql.planner.plan.JoinNode.Type.INNER;
import static io.trino.testing.TestingSession.testSessionBuilder;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

public class TestHivePlans
extends BasePlanTest
Expand Down Expand Up @@ -113,7 +111,7 @@ protected LocalQueryRunner createLocalQueryRunner()
protected LocalQueryRunner createQueryRunner(Session session, HiveMetastore metastore)
{
LocalQueryRunner queryRunner = LocalQueryRunner.create(session);
queryRunner.createCatalog(HIVE_CATALOG_NAME, new TestingHiveConnectorFactory(metastore), Map.of("hive.max-partitions-per-scan", "5"));
queryRunner.createCatalog(HIVE_CATALOG_NAME, new TestingHiveConnectorFactory(metastore), Map.of("hive.max-partitions-for-eager-load", "5"));
return queryRunner;
}

Expand Down Expand Up @@ -308,11 +306,6 @@ public void testQueryScanningForTooManyPartitions()
exchange(REMOTE, REPLICATE,
project(
tableScan("table_unpartitioned", Map.of("R_STR_COL", "str_col", "R_INT_COL", "int_col"))))))));

// The partitions will be loaded during split creation, so it fails during execution.
assertThatThrownBy(() -> getQueryRunner().execute(query))
.isInstanceOf(TrinoException.class)
.hasMessage("Query over table 'test_schema.table_int_with_too_many_partitions' can potentially read more than 5 partitions");
}

// Disable join ordering so that expected plans are well defined.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@ hive.allow-register-partition-procedure=true
hive.metastore-cache-ttl=0s
hive.fs.cache.max-size=10
hive.max-partitions-per-scan=100
hive.max-partitions-for-eager-load=100
hive.hive-views.enabled=true
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,6 @@ hive.allow-register-partition-procedure=true
hive.metastore-cache-ttl=0s
hive.fs.cache.max-size=10
hive.max-partitions-per-scan=100
hive.max-partitions-for-eager-load=100
hive.hive-views.enabled=true
hive.non-managed-table-writes-enabled=true
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ hive.hdfs.trino.principal=hdfs/[email protected]
hive.hdfs.trino.keytab=/etc/hadoop/conf/hdfs.keytab
hive.fs.cache.max-size=10
hive.max-partitions-per-scan=100
hive.max-partitions-for-eager-load=100
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ hive.hdfs.authentication.type=NONE
hive.hdfs.impersonation.enabled=true
hive.fs.cache.max-size=10
hive.max-partitions-per-scan=100
hive.max-partitions-for-eager-load=100
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ hive.hdfs.impersonation.enabled=true
hive.fs.new-directory-permissions=0700
hive.fs.cache.max-size=10
hive.max-partitions-per-scan=100
hive.max-partitions-for-eager-load=100
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@ hive.hdfs.trino.principal=hdfs/[email protected]
hive.hdfs.trino.keytab=/etc/hadoop/conf/hdfs-other.keytab
hive.fs.cache.max-size=10
hive.max-partitions-per-scan=100
hive.max-partitions-for-eager-load=100
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ hive.hdfs.trino.principal=presto-server/[email protected]
hive.hdfs.trino.keytab=/etc/trino/conf/presto-server.keytab
hive.fs.cache.max-size=10
hive.max-partitions-per-scan=100
hive.max-partitions-for-eager-load=100

#required for testGrantRevoke() product test
hive.security=sql-standard
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ hive.hdfs.trino.principal=hdfs/[email protected]
hive.hdfs.trino.keytab=/etc/hadoop/conf/hdfs.keytab
hive.fs.cache.max-size=10
hive.max-partitions-per-scan=100
hive.max-partitions-for-eager-load=100
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ hive.hdfs.trino.credential-cache.location=/etc/trino/conf/presto-server-krbcc
#hive.fs.new-directory-permissions=0700
hive.fs.cache.max-size=10
hive.max-partitions-per-scan=100
hive.max-partitions-for-eager-load=100

#required for testGrantRevoke() product test
hive.security=sql-standard
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ hive.hdfs.trino.keytab=/etc/trino/conf/presto-server.keytab
#hive.fs.new-directory-permissions=0700
hive.fs.cache.max-size=10
hive.max-partitions-per-scan=100
hive.max-partitions-for-eager-load=100

#required for testGrantRevoke() product test
hive.security=sql-standard
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ hive.hdfs.trino.principal=hdfs/[email protected]
hive.hdfs.trino.credential-cache.location=/etc/trino/conf/hdfs-krbcc
hive.fs.cache.max-size=10
hive.max-partitions-per-scan=100
hive.max-partitions-for-eager-load=100
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,5 @@ hive.hdfs.trino.principal=presto-server/[email protected]
hive.hdfs.trino.credential-cache.location=/etc/trino/conf/presto-server-krbcc
hive.fs.cache.max-size=10
hive.max-partitions-per-scan=100
hive.max-partitions-for-eager-load=100
hive.config.resources=/etc/hadoop/conf/core-site.xml,/etc/hadoop-kms/conf/hive-disable-key-provider-cache-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,5 @@ hive.hdfs.trino.principal=presto-server/[email protected]
hive.hdfs.trino.keytab=/etc/trino/conf/presto-server.keytab
hive.fs.cache.max-size=10
hive.max-partitions-per-scan=100
hive.max-partitions-for-eager-load=100
hive.config.resources=/etc/hadoop/conf/core-site.xml,/etc/hadoop-kms/conf/hive-disable-key-provider-cache-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ hive.hdfs.trino.principal=presto-server/[email protected]
hive.hdfs.trino.credential-cache.location=/etc/trino/conf/presto-server-krbcc
hive.fs.cache.max-size=10
hive.max-partitions-per-scan=100
hive.max-partitions-for-eager-load=100
hive.config.resources=/etc/hadoop/conf/core-site.xml,/etc/hadoop-kms/conf/hive-disable-key-provider-cache-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ hive.hdfs.trino.principal=presto-server/[email protected]
hive.hdfs.trino.keytab=/etc/trino/conf/presto-server.keytab
hive.fs.cache.max-size=10
hive.max-partitions-per-scan=100
hive.max-partitions-for-eager-load=100
hive.config.resources=/etc/hadoop/conf/core-site.xml,/etc/hadoop-kms/conf/hive-disable-key-provider-cache-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ hive.hdfs.trino.principal=presto-server/[email protected]
hive.hdfs.trino.keytab=/etc/trino/conf/presto-server.keytab
hive.fs.cache.max-size=10
hive.max-partitions-per-scan=100
hive.max-partitions-for-eager-load=100

#required for testGrantRevoke() product test
hive.security=sql-standard
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ hive.hdfs.trino.principal=presto-server/[email protected]
hive.hdfs.trino.keytab=/etc/trino/conf/other-presto-server.keytab
hive.fs.cache.max-size=10
hive.max-partitions-per-scan=100
hive.max-partitions-for-eager-load=100

#required for testGrantRevoke() product test
hive.security=sql-standard
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ hive.hdfs.trino.principal=presto-server/[email protected]
hive.hdfs.trino.keytab=/etc/trino/conf/presto-server.keytab
hive.fs.cache.max-size=10
hive.max-partitions-per-scan=100
hive.max-partitions-for-eager-load=100

#required for testGrantRevoke() product test
hive.security=sql-standard
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ hive.allow-rename-table=true
hive.metastore-cache-ttl=0s
hive.fs.cache.max-size=10
hive.max-partitions-per-scan=100
hive.max-partitions-for-eager-load=100

0 comments on commit f76fa46

Please sign in to comment.