Skip to content

Commit

Permalink
[fix](planner) Fix select table tablet not effective (apache#25378)
Browse files Browse the repository at this point in the history
Fix select table tablet not effective, table distributed by random.
If tabletID specified in query does not exist in this partition, skip scan partition.
  • Loading branch information
xinyiZzz authored and 胥剑旭 committed Dec 14, 2023
1 parent dcb4d2b commit c589f43
Show file tree
Hide file tree
Showing 2 changed files with 150 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -970,6 +970,8 @@ public void computeSampleTabletIds() {
continue;
}

// It is assumed here that all tablets row count is uniformly distributed
// TODO Use `p.getBaseIndex().getTablet(n).getRowCount()` to get each tablet row count to compute sample.
long avgRowsPerTablet = Math.max(p.getBaseIndex().getRowCount() / ids.size(), 1);
long tabletCounts = Math.max(
avgRowsPerPartition / avgRowsPerTablet + (avgRowsPerPartition % avgRowsPerTablet != 0 ? 1 : 0), 1);
Expand Down Expand Up @@ -1026,19 +1028,28 @@ private void computeTabletInfo() throws UserException {
final Partition partition = olapTable.getPartition(partitionId);
final MaterializedIndex selectedTable = partition.getIndex(selectedIndexId);
final List<Tablet> tablets = Lists.newArrayList();
final Collection<Long> tabletIds = distributionPrune(selectedTable, partition.getDistributionInfo());
Collection<Long> tabletIds = distributionPrune(selectedTable, partition.getDistributionInfo());
LOG.debug("distribution prune tablets: {}", tabletIds);
if (tabletIds != null && sampleTabletIds.size() != 0) {
tabletIds.retainAll(sampleTabletIds);
if (sampleTabletIds.size() != 0) {
if (tabletIds != null) {
tabletIds.retainAll(sampleTabletIds);
} else {
tabletIds = sampleTabletIds;
}
LOG.debug("after sample tablets: {}", tabletIds);
}

List<Long> allTabletIds = selectedTable.getTabletIdsInOrder();
if (tabletIds != null) {
for (Long id : tabletIds) {
tablets.add(selectedTable.getTablet(id));
if (selectedTable.getTablet(id) != null) {
tablets.add(selectedTable.getTablet(id));
scanTabletIds.add(id);
} else {
// The tabletID specified in query does not exist in this partition, skip scan partition.
Preconditions.checkState(sampleTabletIds.size() != 0);
}
}
scanTabletIds.addAll(tabletIds);
} else {
tablets.addAll(selectedTable.getTablets());
scanTabletIds.addAll(allTabletIds);
Expand Down
137 changes: 134 additions & 3 deletions fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -131,14 +131,29 @@ public static void setUp() throws Exception {
+ "\"in_memory\" = \"false\",\n"
+ "\"storage_format\" = \"V2\"\n"
+ ")";
String tbl3 = "CREATE TABLE db1.table3 (\n"
+ " `siteid` int(11) NULL DEFAULT \"10\" COMMENT \"\",\n"
+ " `citycode` smallint(6) NULL COMMENT \"\",\n"
+ " `username` varchar(32) NULL DEFAULT \"\" COMMENT \"\",\n"
+ " `pv` bigint(20) NULL DEFAULT \"0\" COMMENT \"\"\n"
+ ") ENGINE=OLAP\n"
+ "DUPLICATE KEY(`siteid`, `citycode`, `username`)\n"
+ "COMMENT \"OLAP\"\n"
+ "DISTRIBUTED BY RANDOM BUCKETS 10\n"
+ "PROPERTIES (\n"
+ "\"replication_num\" = \"1\",\n"
+ "\"in_memory\" = \"false\",\n"
+ "\"storage_format\" = \"V2\"\n"
+ ")";
dorisAssert = new DorisAssert();
dorisAssert.withDatabase("db1").useDatabase("db1");
dorisAssert.withTable(createTblStmtStr)
.withTable(createBaseAllStmtStr)
.withTable(createPratitionTableStr)
.withTable(createDatePartitionTableStr)
.withTable(tbl1)
.withTable(tbl2);
.withTable(tbl2)
.withTable(tbl3);
}

@Test
Expand Down Expand Up @@ -822,7 +837,7 @@ public void testSelectOuterJoinSql() throws Exception {
}

@Test
public void testSelectTablet() throws Exception {
public void testHashBucketSelectTablet() throws Exception {
String sql1 = "SELECT * FROM db1.table1 TABLET(10031,10032,10033)";
OriginalPlanner planner = (OriginalPlanner) dorisAssert.query(sql1).internalExecuteOneAndGetPlan();
Set<Long> sampleTabletIds = ((OlapScanNode) planner.getScanNodes().get(0)).getSampleTabletIds();
Expand All @@ -832,7 +847,17 @@ public void testSelectTablet() throws Exception {
}

@Test
public void testSelectSampleTable() throws Exception {
public void testRandomBucketSelectTablet() throws Exception {
String sql1 = "SELECT * FROM db1.table3 TABLET(10031,10032,10033)";
OriginalPlanner planner = (OriginalPlanner) dorisAssert.query(sql1).internalExecuteOneAndGetPlan();
Set<Long> sampleTabletIds = ((OlapScanNode) planner.getScanNodes().get(0)).getSampleTabletIds();
Assert.assertTrue(sampleTabletIds.contains(10031L));
Assert.assertTrue(sampleTabletIds.contains(10032L));
Assert.assertTrue(sampleTabletIds.contains(10033L));
}

@Test
public void testSelectSampleHashBucketTable() throws Exception {
Database db = Env.getCurrentInternalCatalog().getDbOrMetaException("default_cluster:db1");
OlapTable tbl = (OlapTable) db.getTableOrMetaException("table1");
long tabletId = 10031L;
Expand Down Expand Up @@ -936,6 +961,112 @@ public void testSelectSampleTable() throws Exception {
Assert.assertEquals(1, sampleTabletIds16.size());
}

@Test
public void testSelectSampleRandomBucketTable() throws Exception {
Database db = Env.getCurrentInternalCatalog().getDbOrMetaException("default_cluster:db1");
OlapTable tbl = (OlapTable) db.getTableOrMetaException("table3");
long tabletId = 10031L;
for (Partition partition : tbl.getPartitions()) {
for (MaterializedIndex mIndex : partition.getMaterializedIndices(IndexExtState.VISIBLE)) {
mIndex.setRowCount(10000);
for (Tablet tablet : mIndex.getTablets()) {
tablet.setTabletId(tabletId);
tabletId += 1;
}
}
}

// 1. TABLESAMPLE ROWS
String sql1 = "SELECT * FROM db1.table3 TABLESAMPLE(10 ROWS)";
OriginalPlanner planner1 = (OriginalPlanner) dorisAssert.query(sql1).internalExecuteOneAndGetPlan();
Set<Long> sampleTabletIds1 = ((OlapScanNode) planner1.getScanNodes().get(0)).getSampleTabletIds();
Assert.assertEquals(1, sampleTabletIds1.size());

String sql2 = "SELECT * FROM db1.table3 TABLESAMPLE(1000 ROWS)";
OriginalPlanner planner2 = (OriginalPlanner) dorisAssert.query(sql2).internalExecuteOneAndGetPlan();
Set<Long> sampleTabletIds2 = ((OlapScanNode) planner2.getScanNodes().get(0)).getSampleTabletIds();
Assert.assertEquals(1, sampleTabletIds2.size());

String sql3 = "SELECT * FROM db1.table3 TABLESAMPLE(1001 ROWS)";
OriginalPlanner planner3 = (OriginalPlanner) dorisAssert.query(sql3).internalExecuteOneAndGetPlan();
Set<Long> sampleTabletIds3 = ((OlapScanNode) planner3.getScanNodes().get(0)).getSampleTabletIds();
Assert.assertEquals(2, sampleTabletIds3.size());

String sql4 = "SELECT * FROM db1.table3 TABLESAMPLE(9500 ROWS)";
OriginalPlanner planner4 = (OriginalPlanner) dorisAssert.query(sql4).internalExecuteOneAndGetPlan();
Set<Long> sampleTabletIds4 = ((OlapScanNode) planner4.getScanNodes().get(0)).getSampleTabletIds();
Assert.assertEquals(0, sampleTabletIds4.size()); // no sample, all tablet

String sql5 = "SELECT * FROM db1.table3 TABLESAMPLE(11000 ROWS)";
OriginalPlanner planner5 = (OriginalPlanner) dorisAssert.query(sql5).internalExecuteOneAndGetPlan();
Set<Long> sampleTabletIds5 = ((OlapScanNode) planner5.getScanNodes().get(0)).getSampleTabletIds();
Assert.assertEquals(0, sampleTabletIds5.size()); // no sample, all tablet

String sql6 = "SELECT * FROM db1.table3 TABLET(10033) TABLESAMPLE(900 ROWS)";
OriginalPlanner planner6 = (OriginalPlanner) dorisAssert.query(sql6).internalExecuteOneAndGetPlan();
Set<Long> sampleTabletIds6 = ((OlapScanNode) planner6.getScanNodes().get(0)).getSampleTabletIds();
Assert.assertTrue(sampleTabletIds6.size() >= 1 && sampleTabletIds6.size() <= 2);
Assert.assertTrue(sampleTabletIds6.contains(10033L));

// 2. TABLESAMPLE PERCENT
String sql7 = "SELECT * FROM db1.table3 TABLESAMPLE(10 PERCENT)";
OriginalPlanner planner7 = (OriginalPlanner) dorisAssert.query(sql7).internalExecuteOneAndGetPlan();
Set<Long> sampleTabletIds7 = ((OlapScanNode) planner7.getScanNodes().get(0)).getSampleTabletIds();
Assert.assertEquals(1, sampleTabletIds7.size());

String sql8 = "SELECT * FROM db1.table3 TABLESAMPLE(15 PERCENT)";
OriginalPlanner planner8 = (OriginalPlanner) dorisAssert.query(sql8).internalExecuteOneAndGetPlan();
Set<Long> sampleTabletIds8 = ((OlapScanNode) planner8.getScanNodes().get(0)).getSampleTabletIds();
Assert.assertEquals(2, sampleTabletIds8.size());

String sql9 = "SELECT * FROM db1.table3 TABLESAMPLE(100 PERCENT)";
OriginalPlanner planner9 = (OriginalPlanner) dorisAssert.query(sql9).internalExecuteOneAndGetPlan();
Set<Long> sampleTabletIds9 = ((OlapScanNode) planner9.getScanNodes().get(0)).getSampleTabletIds();
Assert.assertEquals(0, sampleTabletIds9.size());

String sql10 = "SELECT * FROM db1.table3 TABLESAMPLE(110 PERCENT)";
OriginalPlanner planner10 = (OriginalPlanner) dorisAssert.query(sql10).internalExecuteOneAndGetPlan();
Set<Long> sampleTabletIds10 = ((OlapScanNode) planner10.getScanNodes().get(0)).getSampleTabletIds();
Assert.assertEquals(0, sampleTabletIds10.size());

String sql11 = "SELECT * FROM db1.table3 TABLET(10033) TABLESAMPLE(5 PERCENT)";
OriginalPlanner planner11 = (OriginalPlanner) dorisAssert.query(sql11).internalExecuteOneAndGetPlan();
Set<Long> sampleTabletIds11 = ((OlapScanNode) planner11.getScanNodes().get(0)).getSampleTabletIds();
Assert.assertTrue(sampleTabletIds11.size() >= 1 && sampleTabletIds11.size() <= 2);
Assert.assertTrue(sampleTabletIds11.contains(10033L));

// 3. TABLESAMPLE REPEATABLE
String sql12 = "SELECT * FROM db1.table3 TABLESAMPLE(900 ROWS)";
OriginalPlanner planner12 = (OriginalPlanner) dorisAssert.query(sql12).internalExecuteOneAndGetPlan();
Set<Long> sampleTabletIds12 = ((OlapScanNode) planner12.getScanNodes().get(0)).getSampleTabletIds();
Assert.assertEquals(1, sampleTabletIds12.size());

String sql13 = "SELECT * FROM db1.table3 TABLESAMPLE(900 ROWS) REPEATABLE 2";
OriginalPlanner planner13 = (OriginalPlanner) dorisAssert.query(sql13).internalExecuteOneAndGetPlan();
Set<Long> sampleTabletIds13 = ((OlapScanNode) planner13.getScanNodes().get(0)).getSampleTabletIds();
Assert.assertEquals(1, sampleTabletIds13.size());
Assert.assertTrue(sampleTabletIds13.contains(10033L));

String sql14 = "SELECT * FROM db1.table3 TABLESAMPLE(900 ROWS) REPEATABLE 10";
OriginalPlanner planner14 = (OriginalPlanner) dorisAssert.query(sql14).internalExecuteOneAndGetPlan();
Set<Long> sampleTabletIds14 = ((OlapScanNode) planner14.getScanNodes().get(0)).getSampleTabletIds();
Assert.assertEquals(1, sampleTabletIds14.size());
Assert.assertTrue(sampleTabletIds14.contains(10031L));

String sql15 = "SELECT * FROM db1.table3 TABLESAMPLE(900 ROWS) REPEATABLE 0";
OriginalPlanner planner15 = (OriginalPlanner) dorisAssert.query(sql15).internalExecuteOneAndGetPlan();
Set<Long> sampleTabletIds15 = ((OlapScanNode) planner15.getScanNodes().get(0)).getSampleTabletIds();
Assert.assertEquals(1, sampleTabletIds15.size());
Assert.assertTrue(sampleTabletIds15.contains(10031L));

// 4. select returns 900 rows of results
String sql16 = "SELECT * FROM (SELECT * FROM db1.table3 TABLESAMPLE(900 ROWS) REPEATABLE 9999999 limit 900) t";
OriginalPlanner planner16 = (OriginalPlanner) dorisAssert.query(sql16).internalExecuteOneAndGetPlan();
Set<Long> sampleTabletIds16 = ((OlapScanNode) planner16.getScanNodes().get(0)).getSampleTabletIds();
Assert.assertEquals(1, sampleTabletIds16.size());
}


@Test
public void testSelectExcept() throws Exception {
ConnectContext ctx = UtFrameUtils.createDefaultCtx();
Expand Down

0 comments on commit c589f43

Please sign in to comment.