From c589f43377a739556e6362fc1e27e972af54d9a8 Mon Sep 17 00:00:00 2001 From: Xinyi Zou Date: Wed, 25 Oct 2023 18:02:27 +0800 Subject: [PATCH] [fix](planner) Fix `select table tablet` not effective (#25378) Fix select table tablet not effective, table distributed by random. If tabletID specified in query does not exist in this partition, skip scan partition. --- .../apache/doris/planner/OlapScanNode.java | 21 ++- .../apache/doris/analysis/SelectStmtTest.java | 137 +++++++++++++++++- 2 files changed, 150 insertions(+), 8 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java index bb10d762081947..cc00cfd87709cf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -970,6 +970,8 @@ public void computeSampleTabletIds() { continue; } + // It is assumed here that all tablets row count is uniformly distributed + // TODO Use `p.getBaseIndex().getTablet(n).getRowCount()` to get each tablet row count to compute sample. long avgRowsPerTablet = Math.max(p.getBaseIndex().getRowCount() / ids.size(), 1); long tabletCounts = Math.max( avgRowsPerPartition / avgRowsPerTablet + (avgRowsPerPartition % avgRowsPerTablet != 0 ? 1 : 0), 1); @@ -1026,19 +1028,28 @@ private void computeTabletInfo() throws UserException { final Partition partition = olapTable.getPartition(partitionId); final MaterializedIndex selectedTable = partition.getIndex(selectedIndexId); final List tablets = Lists.newArrayList(); - final Collection tabletIds = distributionPrune(selectedTable, partition.getDistributionInfo()); + Collection tabletIds = distributionPrune(selectedTable, partition.getDistributionInfo()); LOG.debug("distribution prune tablets: {}", tabletIds); - if (tabletIds != null && sampleTabletIds.size() != 0) { - tabletIds.retainAll(sampleTabletIds); + if (sampleTabletIds.size() != 0) { + if (tabletIds != null) { + tabletIds.retainAll(sampleTabletIds); + } else { + tabletIds = sampleTabletIds; + } LOG.debug("after sample tablets: {}", tabletIds); } List allTabletIds = selectedTable.getTabletIdsInOrder(); if (tabletIds != null) { for (Long id : tabletIds) { - tablets.add(selectedTable.getTablet(id)); + if (selectedTable.getTablet(id) != null) { + tablets.add(selectedTable.getTablet(id)); + scanTabletIds.add(id); + } else { + // The tabletID specified in query does not exist in this partition, skip scan partition. + Preconditions.checkState(sampleTabletIds.size() != 0); + } } - scanTabletIds.addAll(tabletIds); } else { tablets.addAll(selectedTable.getTablets()); scanTabletIds.addAll(allTabletIds); diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java index 29aa6d66445ef2..f65843f5116063 100755 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java @@ -131,6 +131,20 @@ public static void setUp() throws Exception { + "\"in_memory\" = \"false\",\n" + "\"storage_format\" = \"V2\"\n" + ")"; + String tbl3 = "CREATE TABLE db1.table3 (\n" + + " `siteid` int(11) NULL DEFAULT \"10\" COMMENT \"\",\n" + + " `citycode` smallint(6) NULL COMMENT \"\",\n" + + " `username` varchar(32) NULL DEFAULT \"\" COMMENT \"\",\n" + + " `pv` bigint(20) NULL DEFAULT \"0\" COMMENT \"\"\n" + + ") ENGINE=OLAP\n" + + "DUPLICATE KEY(`siteid`, `citycode`, `username`)\n" + + "COMMENT \"OLAP\"\n" + + "DISTRIBUTED BY RANDOM BUCKETS 10\n" + + "PROPERTIES (\n" + + "\"replication_num\" = \"1\",\n" + + "\"in_memory\" = \"false\",\n" + + "\"storage_format\" = \"V2\"\n" + + ")"; dorisAssert = new DorisAssert(); dorisAssert.withDatabase("db1").useDatabase("db1"); dorisAssert.withTable(createTblStmtStr) @@ -138,7 +152,8 @@ public static void setUp() throws Exception { .withTable(createPratitionTableStr) .withTable(createDatePartitionTableStr) .withTable(tbl1) - .withTable(tbl2); + .withTable(tbl2) + .withTable(tbl3); } @Test @@ -822,7 +837,7 @@ public void testSelectOuterJoinSql() throws Exception { } @Test - public void testSelectTablet() throws Exception { + public void testHashBucketSelectTablet() throws Exception { String sql1 = "SELECT * FROM db1.table1 TABLET(10031,10032,10033)"; OriginalPlanner planner = (OriginalPlanner) dorisAssert.query(sql1).internalExecuteOneAndGetPlan(); Set sampleTabletIds = ((OlapScanNode) planner.getScanNodes().get(0)).getSampleTabletIds(); @@ -832,7 +847,17 @@ public void testSelectTablet() throws Exception { } @Test - public void testSelectSampleTable() throws Exception { + public void testRandomBucketSelectTablet() throws Exception { + String sql1 = "SELECT * FROM db1.table3 TABLET(10031,10032,10033)"; + OriginalPlanner planner = (OriginalPlanner) dorisAssert.query(sql1).internalExecuteOneAndGetPlan(); + Set sampleTabletIds = ((OlapScanNode) planner.getScanNodes().get(0)).getSampleTabletIds(); + Assert.assertTrue(sampleTabletIds.contains(10031L)); + Assert.assertTrue(sampleTabletIds.contains(10032L)); + Assert.assertTrue(sampleTabletIds.contains(10033L)); + } + + @Test + public void testSelectSampleHashBucketTable() throws Exception { Database db = Env.getCurrentInternalCatalog().getDbOrMetaException("default_cluster:db1"); OlapTable tbl = (OlapTable) db.getTableOrMetaException("table1"); long tabletId = 10031L; @@ -936,6 +961,112 @@ public void testSelectSampleTable() throws Exception { Assert.assertEquals(1, sampleTabletIds16.size()); } + @Test + public void testSelectSampleRandomBucketTable() throws Exception { + Database db = Env.getCurrentInternalCatalog().getDbOrMetaException("default_cluster:db1"); + OlapTable tbl = (OlapTable) db.getTableOrMetaException("table3"); + long tabletId = 10031L; + for (Partition partition : tbl.getPartitions()) { + for (MaterializedIndex mIndex : partition.getMaterializedIndices(IndexExtState.VISIBLE)) { + mIndex.setRowCount(10000); + for (Tablet tablet : mIndex.getTablets()) { + tablet.setTabletId(tabletId); + tabletId += 1; + } + } + } + + // 1. TABLESAMPLE ROWS + String sql1 = "SELECT * FROM db1.table3 TABLESAMPLE(10 ROWS)"; + OriginalPlanner planner1 = (OriginalPlanner) dorisAssert.query(sql1).internalExecuteOneAndGetPlan(); + Set sampleTabletIds1 = ((OlapScanNode) planner1.getScanNodes().get(0)).getSampleTabletIds(); + Assert.assertEquals(1, sampleTabletIds1.size()); + + String sql2 = "SELECT * FROM db1.table3 TABLESAMPLE(1000 ROWS)"; + OriginalPlanner planner2 = (OriginalPlanner) dorisAssert.query(sql2).internalExecuteOneAndGetPlan(); + Set sampleTabletIds2 = ((OlapScanNode) planner2.getScanNodes().get(0)).getSampleTabletIds(); + Assert.assertEquals(1, sampleTabletIds2.size()); + + String sql3 = "SELECT * FROM db1.table3 TABLESAMPLE(1001 ROWS)"; + OriginalPlanner planner3 = (OriginalPlanner) dorisAssert.query(sql3).internalExecuteOneAndGetPlan(); + Set sampleTabletIds3 = ((OlapScanNode) planner3.getScanNodes().get(0)).getSampleTabletIds(); + Assert.assertEquals(2, sampleTabletIds3.size()); + + String sql4 = "SELECT * FROM db1.table3 TABLESAMPLE(9500 ROWS)"; + OriginalPlanner planner4 = (OriginalPlanner) dorisAssert.query(sql4).internalExecuteOneAndGetPlan(); + Set sampleTabletIds4 = ((OlapScanNode) planner4.getScanNodes().get(0)).getSampleTabletIds(); + Assert.assertEquals(0, sampleTabletIds4.size()); // no sample, all tablet + + String sql5 = "SELECT * FROM db1.table3 TABLESAMPLE(11000 ROWS)"; + OriginalPlanner planner5 = (OriginalPlanner) dorisAssert.query(sql5).internalExecuteOneAndGetPlan(); + Set sampleTabletIds5 = ((OlapScanNode) planner5.getScanNodes().get(0)).getSampleTabletIds(); + Assert.assertEquals(0, sampleTabletIds5.size()); // no sample, all tablet + + String sql6 = "SELECT * FROM db1.table3 TABLET(10033) TABLESAMPLE(900 ROWS)"; + OriginalPlanner planner6 = (OriginalPlanner) dorisAssert.query(sql6).internalExecuteOneAndGetPlan(); + Set sampleTabletIds6 = ((OlapScanNode) planner6.getScanNodes().get(0)).getSampleTabletIds(); + Assert.assertTrue(sampleTabletIds6.size() >= 1 && sampleTabletIds6.size() <= 2); + Assert.assertTrue(sampleTabletIds6.contains(10033L)); + + // 2. TABLESAMPLE PERCENT + String sql7 = "SELECT * FROM db1.table3 TABLESAMPLE(10 PERCENT)"; + OriginalPlanner planner7 = (OriginalPlanner) dorisAssert.query(sql7).internalExecuteOneAndGetPlan(); + Set sampleTabletIds7 = ((OlapScanNode) planner7.getScanNodes().get(0)).getSampleTabletIds(); + Assert.assertEquals(1, sampleTabletIds7.size()); + + String sql8 = "SELECT * FROM db1.table3 TABLESAMPLE(15 PERCENT)"; + OriginalPlanner planner8 = (OriginalPlanner) dorisAssert.query(sql8).internalExecuteOneAndGetPlan(); + Set sampleTabletIds8 = ((OlapScanNode) planner8.getScanNodes().get(0)).getSampleTabletIds(); + Assert.assertEquals(2, sampleTabletIds8.size()); + + String sql9 = "SELECT * FROM db1.table3 TABLESAMPLE(100 PERCENT)"; + OriginalPlanner planner9 = (OriginalPlanner) dorisAssert.query(sql9).internalExecuteOneAndGetPlan(); + Set sampleTabletIds9 = ((OlapScanNode) planner9.getScanNodes().get(0)).getSampleTabletIds(); + Assert.assertEquals(0, sampleTabletIds9.size()); + + String sql10 = "SELECT * FROM db1.table3 TABLESAMPLE(110 PERCENT)"; + OriginalPlanner planner10 = (OriginalPlanner) dorisAssert.query(sql10).internalExecuteOneAndGetPlan(); + Set sampleTabletIds10 = ((OlapScanNode) planner10.getScanNodes().get(0)).getSampleTabletIds(); + Assert.assertEquals(0, sampleTabletIds10.size()); + + String sql11 = "SELECT * FROM db1.table3 TABLET(10033) TABLESAMPLE(5 PERCENT)"; + OriginalPlanner planner11 = (OriginalPlanner) dorisAssert.query(sql11).internalExecuteOneAndGetPlan(); + Set sampleTabletIds11 = ((OlapScanNode) planner11.getScanNodes().get(0)).getSampleTabletIds(); + Assert.assertTrue(sampleTabletIds11.size() >= 1 && sampleTabletIds11.size() <= 2); + Assert.assertTrue(sampleTabletIds11.contains(10033L)); + + // 3. TABLESAMPLE REPEATABLE + String sql12 = "SELECT * FROM db1.table3 TABLESAMPLE(900 ROWS)"; + OriginalPlanner planner12 = (OriginalPlanner) dorisAssert.query(sql12).internalExecuteOneAndGetPlan(); + Set sampleTabletIds12 = ((OlapScanNode) planner12.getScanNodes().get(0)).getSampleTabletIds(); + Assert.assertEquals(1, sampleTabletIds12.size()); + + String sql13 = "SELECT * FROM db1.table3 TABLESAMPLE(900 ROWS) REPEATABLE 2"; + OriginalPlanner planner13 = (OriginalPlanner) dorisAssert.query(sql13).internalExecuteOneAndGetPlan(); + Set sampleTabletIds13 = ((OlapScanNode) planner13.getScanNodes().get(0)).getSampleTabletIds(); + Assert.assertEquals(1, sampleTabletIds13.size()); + Assert.assertTrue(sampleTabletIds13.contains(10033L)); + + String sql14 = "SELECT * FROM db1.table3 TABLESAMPLE(900 ROWS) REPEATABLE 10"; + OriginalPlanner planner14 = (OriginalPlanner) dorisAssert.query(sql14).internalExecuteOneAndGetPlan(); + Set sampleTabletIds14 = ((OlapScanNode) planner14.getScanNodes().get(0)).getSampleTabletIds(); + Assert.assertEquals(1, sampleTabletIds14.size()); + Assert.assertTrue(sampleTabletIds14.contains(10031L)); + + String sql15 = "SELECT * FROM db1.table3 TABLESAMPLE(900 ROWS) REPEATABLE 0"; + OriginalPlanner planner15 = (OriginalPlanner) dorisAssert.query(sql15).internalExecuteOneAndGetPlan(); + Set sampleTabletIds15 = ((OlapScanNode) planner15.getScanNodes().get(0)).getSampleTabletIds(); + Assert.assertEquals(1, sampleTabletIds15.size()); + Assert.assertTrue(sampleTabletIds15.contains(10031L)); + + // 4. select returns 900 rows of results + String sql16 = "SELECT * FROM (SELECT * FROM db1.table3 TABLESAMPLE(900 ROWS) REPEATABLE 9999999 limit 900) t"; + OriginalPlanner planner16 = (OriginalPlanner) dorisAssert.query(sql16).internalExecuteOneAndGetPlan(); + Set sampleTabletIds16 = ((OlapScanNode) planner16.getScanNodes().get(0)).getSampleTabletIds(); + Assert.assertEquals(1, sampleTabletIds16.size()); + } + + @Test public void testSelectExcept() throws Exception { ConnectContext ctx = UtFrameUtils.createDefaultCtx();