From c460fe747750e30329ed12818753fe973d943669 Mon Sep 17 00:00:00 2001 From: Letian Jiang Date: Thu, 9 Mar 2023 19:21:59 +0800 Subject: [PATCH] [Enhancement] Get partitions by batch of size 5k per RPC (#19241) --- .../src/main/java/com/starrocks/common/Config.java | 2 +- .../com/starrocks/connector/hive/HiveMetastore.java | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/fe/fe-core/src/main/java/com/starrocks/common/Config.java b/fe/fe-core/src/main/java/com/starrocks/common/Config.java index 1f2b52c491a3e..d378bdb7bc936 100644 --- a/fe/fe-core/src/main/java/com/starrocks/common/Config.java +++ b/fe/fe-core/src/main/java/com/starrocks/common/Config.java @@ -1618,7 +1618,7 @@ public class Config extends ConfigBase { * The maximum number of partitions to fetch from the metastore in one RPC. */ @ConfField - public static int max_hive_partitions_per_rpc = 1000; + public static int max_hive_partitions_per_rpc = 5000; /** * The interval of lazy refreshing remote file's metadata cache diff --git a/fe/fe-core/src/main/java/com/starrocks/connector/hive/HiveMetastore.java b/fe/fe-core/src/main/java/com/starrocks/connector/hive/HiveMetastore.java index 6690b3ff21a19..db7bebf609c45 100644 --- a/fe/fe-core/src/main/java/com/starrocks/connector/hive/HiveMetastore.java +++ b/fe/fe-core/src/main/java/com/starrocks/connector/hive/HiveMetastore.java @@ -30,6 +30,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.function.Function; @@ -101,8 +102,15 @@ public Partition getPartition(String dbName, String tblName, List partit } public Map getPartitionsByNames(String dbName, String tblName, List partitionNames) { - List partitions = - client.getPartitionsByNames(dbName, tblName, partitionNames); + List partitions = new ArrayList<>(); + // fetch partitions by batch per RPC + for (int start = 0; start < partitionNames.size(); start += Config.max_hive_partitions_per_rpc) { + int end = Math.min(start + Config.max_hive_partitions_per_rpc, partitionNames.size()); + List namesPerRPC = partitionNames.subList(start, end); + List partsPerRPC = + client.getPartitionsByNames(dbName, tblName, namesPerRPC); + partitions.addAll(partsPerRPC); + } Map> partitionNameToPartitionValues = partitionNames.stream() .collect(Collectors.toMap(Function.identity(), PartitionUtil::toPartitionValues));