From d71379c6feed62afc4b6f92c794c3b18c20a9d6e Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Fri, 23 Aug 2024 11:58:44 +0800 Subject: [PATCH] [improve](statistics)Record index row count update time. (#39788) Record index row count update time. So the planner could use it to decide which row count to use, reported row count or analyzed row count. --- .../doris/analysis/ShowTableStatsStmt.java | 3 +++ .../apache/doris/statistics/AnalysisInfo.java | 5 +++++ .../doris/statistics/BaseAnalysisTask.java | 2 ++ .../statistics/StatisticsRepository.java | 4 +++- .../doris/statistics/TableStatsMeta.java | 22 +++++++++++++++++-- .../suites/statistics/test_analyze_mv.groovy | 11 ++++++++++ 6 files changed, 44 insertions(+), 3 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java index 36a0e9a5872487..968574b750c5e1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowTableStatsStmt.java @@ -75,6 +75,7 @@ public class ShowTableStatsStmt extends ShowStmt { .add("table_name") .add("index_name") .add("row_count") + .add("update_time") .build(); private static final ImmutableList COLUMN_PARTITION_TITLE_NAMES = @@ -266,10 +267,12 @@ public ShowResultSet constructIndexResultSet(TableStatsMeta tableStatistic) { if (rowCount == -1) { return new ShowResultSet(getMetaData(), result); } + long updateTime = tableStatistic.getRowCountUpdateTime(olapTable.getIndexIdByName(indexName)); List row = Lists.newArrayList(); row.add(table.getName()); row.add(indexName); row.add(String.valueOf(rowCount)); + row.add(String.valueOf(updateTime)); result.add(row); return new ShowResultSet(getMetaData(), result); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java index 6ec413821ea37b..0e89b5225aba03 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java @@ -198,6 +198,7 @@ public enum ScheduleType { public final boolean enablePartition; public final ConcurrentMap indexesRowCount = new ConcurrentHashMap<>(); + public final ConcurrentMap indexesRowCountUpdateTime = new ConcurrentHashMap<>(); public AnalysisInfo(long jobId, long taskId, List taskIds, long catalogId, long dbId, long tblId, Set> jobColumns, Set partitionNames, String colName, Long indexId, @@ -357,4 +358,8 @@ public TableIf getTable() { public void addIndexRowCount(long indexId, long rowCount) { indexesRowCount.put(indexId, rowCount); } + + public void addIndexUpdateRowCountTime(long indexId, long time) { + indexesRowCountUpdateTime.put(indexId, time); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java index 329231f360487c..a16c0275da23ff 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java @@ -498,6 +498,7 @@ protected void runQuery(String sql) { try (AutoCloseConnectContext a = StatisticsUtil.buildConnectContext()) { stmtExecutor = new StmtExecutor(a.connectContext, sql); ColStatsData colStatsData = new ColStatsData(stmtExecutor.executeInternalQuery().get(0)); + long analyzeTimestamp = System.currentTimeMillis(); // Update index row count after analyze. if (this instanceof OlapAnalysisTask) { AnalysisInfo jobInfo = Env.getCurrentEnv().getAnalysisManager().findJobInfo(job.getJobInfo().jobId); @@ -505,6 +506,7 @@ protected void runQuery(String sql) { jobInfo = jobInfo == null ? job.jobInfo : jobInfo; long indexId = info.indexId == -1 ? ((OlapTable) tbl).getBaseIndexId() : info.indexId; jobInfo.addIndexRowCount(indexId, colStatsData.count); + jobInfo.addIndexUpdateRowCountTime(indexId, analyzeTimestamp); } Env.getCurrentEnv().getStatisticsCache().syncColStats(colStatsData); queryId = DebugUtil.printId(stmtExecutor.getContext().queryId()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java index 76fb22a60e4321..83350d16e0423a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java @@ -380,8 +380,9 @@ public static void alterColumnStatistics(AlterColumnStatsStmt alterColumnStatsSt objects.catalog.getId(), objects.db.getId(), objects.table.getId(), indexId, colName, null, columnStatistic); Env.getCurrentEnv().getStatisticsCache().syncColStats(data); + long timestamp = System.currentTimeMillis(); AnalysisInfo mockedJobInfo = new AnalysisInfoBuilder() - .setTblUpdateTime(System.currentTimeMillis()) + .setTblUpdateTime(timestamp) .setColName("") .setRowCount((long) Double.parseDouble(rowCount)) .setJobColumns(Sets.newHashSet()) @@ -391,6 +392,7 @@ public static void alterColumnStatistics(AlterColumnStatsStmt alterColumnStatsSt if (objects.table instanceof OlapTable) { indexId = indexId == -1 ? ((OlapTable) objects.table).getBaseIndexId() : indexId; mockedJobInfo.addIndexRowCount(indexId, (long) Double.parseDouble(rowCount)); + mockedJobInfo.addIndexUpdateRowCountTime(indexId, timestamp); } Env.getCurrentEnv().getAnalysisManager().updateTableStatsForAlterStats(mockedJobInfo, objects.table); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java index 15509525c34ad1..97833041922a66 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java @@ -88,6 +88,9 @@ public class TableStatsMeta implements Writable, GsonPostProcessable { @SerializedName("irc") public ConcurrentMap indexesRowCount = new ConcurrentHashMap<>(); + @SerializedName("ircut") + public ConcurrentMap indexesRowCountUpdateTime = new ConcurrentHashMap<>(); + @VisibleForTesting public TableStatsMeta() { tblId = 0; @@ -166,7 +169,8 @@ public void update(AnalysisInfo analyzedJob, TableIf tableIf) { if (tableIf != null) { if (tableIf instanceof OlapTable) { indexesRowCount.putAll(analyzedJob.indexesRowCount); - clearStaleIndexRowCount((OlapTable) tableIf); + indexesRowCountUpdateTime.putAll(analyzedJob.indexesRowCountUpdateTime); + clearStaleIndexRowCountAndTime((OlapTable) tableIf); } rowCount = analyzedJob.rowCount; if (rowCount == 0 && AnalysisMethod.SAMPLE.equals(analyzedJob.analysisMethod)) { @@ -198,13 +202,20 @@ public void gsonPostProcess() throws IOException { if (colToColStatsMeta == null) { colToColStatsMeta = new ConcurrentHashMap<>(); } + if (indexesRowCountUpdateTime == null) { + indexesRowCountUpdateTime = new ConcurrentHashMap<>(); + } } public long getRowCount(long indexId) { return indexesRowCount.getOrDefault(indexId, -1L); } - private void clearStaleIndexRowCount(OlapTable table) { + public long getRowCountUpdateTime(long indexId) { + return indexesRowCountUpdateTime.getOrDefault(indexId, 0L); + } + + private void clearStaleIndexRowCountAndTime(OlapTable table) { Iterator iterator = indexesRowCount.keySet().iterator(); List indexIds = table.getIndexIds(); while (iterator.hasNext()) { @@ -213,6 +224,13 @@ private void clearStaleIndexRowCount(OlapTable table) { iterator.remove(); } } + iterator = indexesRowCountUpdateTime.keySet().iterator(); + while (iterator.hasNext()) { + long key = iterator.next(); + if (indexIds.contains(key)) { + iterator.remove(); + } + } } public long getBaseIndexDeltaRowCount(OlapTable table) { diff --git a/regression-test/suites/statistics/test_analyze_mv.groovy b/regression-test/suites/statistics/test_analyze_mv.groovy index 44a4bbc5aa8507..4b9644c4821d96 100644 --- a/regression-test/suites/statistics/test_analyze_mv.groovy +++ b/regression-test/suites/statistics/test_analyze_mv.groovy @@ -132,6 +132,7 @@ suite("test_analyze_mv") { createMV("create materialized view mv3 as select key1, key2, sum(value1), max(value2), min(value3) from mvTestDup group by key1, key2;") sql """insert into mvTestDup values (1, 2, 3, 4, 5), (1, 2, 3, 4, 5), (10, 20, 30, 40, 50), (10, 20, 30, 40, 50), (100, 200, 300, 400, 500), (1001, 2001, 3001, 4001, 5001);""" + def timestamp = System.currentTimeMillis(); sql """analyze table mvTestDup with sync;""" // Test show index row count @@ -140,21 +141,25 @@ suite("test_analyze_mv") { assertEquals("mvTestDup", result_row[0][0]) assertEquals("mvTestDup", result_row[0][1]) assertEquals("6", result_row[0][2]) + assertTrue(Long.parseLong(result_row[0][3]) >= timestamp) result_row = sql """show index stats mvTestDup mv1""" assertEquals(1, result_row.size()) assertEquals("mvTestDup", result_row[0][0]) assertEquals("mv1", result_row[0][1]) assertEquals("6", result_row[0][2]) + assertTrue(Long.parseLong(result_row[0][3]) >= timestamp) result_row = sql """show index stats mvTestDup mv2""" assertEquals(1, result_row.size()) assertEquals("mvTestDup", result_row[0][0]) assertEquals("mv2", result_row[0][1]) assertEquals("6", result_row[0][2]) + assertTrue(Long.parseLong(result_row[0][3]) >= timestamp) result_row = sql """show index stats mvTestDup mv3""" assertEquals(1, result_row.size()) assertEquals("mvTestDup", result_row[0][0]) assertEquals("mv3", result_row[0][1]) assertEquals("4", result_row[0][2]) + assertTrue(Long.parseLong(result_row[0][3]) >= timestamp) // Compare show whole table column stats result with show single column. def result_all = sql """show column stats mvTestDup""" @@ -433,21 +438,27 @@ suite("test_analyze_mv") { assertEquals("FULL", result_sample[0][9]) // Test alter table index row count. + timestamp = System.currentTimeMillis(); sql """alter table mvTestDup modify column `value2` set stats ('row_count'='1.5E8', 'ndv'='3.0', 'num_nulls'='0.0', 'data_size'='1.5E8', 'min_value'='1', 'max_value'='10');""" result_row = sql """show index stats mvTestDup mvTestDup;""" assertEquals("mvTestDup", result_row[0][0]) assertEquals("mvTestDup", result_row[0][1]) assertEquals("150000000", result_row[0][2]) + assertTrue(Long.parseLong(result_row[0][3]) >= timestamp) + timestamp = System.currentTimeMillis(); sql """alter table mvTestDup index mv1 modify column `mv_key1` set stats ('row_count'='3443', 'ndv'='3.0', 'num_nulls'='0.0', 'data_size'='1.5E8', 'min_value'='1', 'max_value'='10');""" result_row = sql """show index stats mvTestDup mv1;""" assertEquals("mvTestDup", result_row[0][0]) assertEquals("mv1", result_row[0][1]) assertEquals("3443", result_row[0][2]) + assertTrue(Long.parseLong(result_row[0][3]) >= timestamp) + timestamp = System.currentTimeMillis(); sql """alter table mvTestDup index mv3 modify column `mva_MAX__``value2``` set stats ('row_count'='234234', 'ndv'='3.0', 'num_nulls'='0.0', 'data_size'='1.5E8', 'min_value'='1', 'max_value'='10');""" result_row = sql """show index stats mvTestDup mv3;""" assertEquals("mvTestDup", result_row[0][0]) assertEquals("mv3", result_row[0][1]) assertEquals("234234", result_row[0][2]) + assertTrue(Long.parseLong(result_row[0][3]) >= timestamp) sql """drop stats mvTestDup""" result_sample = sql """show column stats mvTestDup"""