Skip to content

Commit

Permalink
[improve](statistics)Record index row count update time. (#39788)
Browse files Browse the repository at this point in the history
Record index row count update time. So the planner could use it to
decide which row count to use, reported row count or analyzed row count.
  • Loading branch information
Jibing-Li authored Aug 23, 2024
1 parent 8fe11aa commit d71379c
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ public class ShowTableStatsStmt extends ShowStmt {
.add("table_name")
.add("index_name")
.add("row_count")
.add("update_time")
.build();

private static final ImmutableList<String> COLUMN_PARTITION_TITLE_NAMES =
Expand Down Expand Up @@ -266,10 +267,12 @@ public ShowResultSet constructIndexResultSet(TableStatsMeta tableStatistic) {
if (rowCount == -1) {
return new ShowResultSet(getMetaData(), result);
}
long updateTime = tableStatistic.getRowCountUpdateTime(olapTable.getIndexIdByName(indexName));
List<String> row = Lists.newArrayList();
row.add(table.getName());
row.add(indexName);
row.add(String.valueOf(rowCount));
row.add(String.valueOf(updateTime));
result.add(row);
return new ShowResultSet(getMetaData(), result);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ public enum ScheduleType {
public final boolean enablePartition;

public final ConcurrentMap<Long, Long> indexesRowCount = new ConcurrentHashMap<>();
public final ConcurrentMap<Long, Long> indexesRowCountUpdateTime = new ConcurrentHashMap<>();

public AnalysisInfo(long jobId, long taskId, List<Long> taskIds, long catalogId, long dbId, long tblId,
Set<Pair<String, String>> jobColumns, Set<String> partitionNames, String colName, Long indexId,
Expand Down Expand Up @@ -357,4 +358,8 @@ public TableIf getTable() {
public void addIndexRowCount(long indexId, long rowCount) {
indexesRowCount.put(indexId, rowCount);
}

public void addIndexUpdateRowCountTime(long indexId, long time) {
indexesRowCountUpdateTime.put(indexId, time);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -498,13 +498,15 @@ protected void runQuery(String sql) {
try (AutoCloseConnectContext a = StatisticsUtil.buildConnectContext()) {
stmtExecutor = new StmtExecutor(a.connectContext, sql);
ColStatsData colStatsData = new ColStatsData(stmtExecutor.executeInternalQuery().get(0));
long analyzeTimestamp = System.currentTimeMillis();
// Update index row count after analyze.
if (this instanceof OlapAnalysisTask) {
AnalysisInfo jobInfo = Env.getCurrentEnv().getAnalysisManager().findJobInfo(job.getJobInfo().jobId);
// For sync job, get jobInfo from job.jobInfo.
jobInfo = jobInfo == null ? job.jobInfo : jobInfo;
long indexId = info.indexId == -1 ? ((OlapTable) tbl).getBaseIndexId() : info.indexId;
jobInfo.addIndexRowCount(indexId, colStatsData.count);
jobInfo.addIndexUpdateRowCountTime(indexId, analyzeTimestamp);
}
Env.getCurrentEnv().getStatisticsCache().syncColStats(colStatsData);
queryId = DebugUtil.printId(stmtExecutor.getContext().queryId());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -380,8 +380,9 @@ public static void alterColumnStatistics(AlterColumnStatsStmt alterColumnStatsSt
objects.catalog.getId(), objects.db.getId(), objects.table.getId(), indexId, colName,
null, columnStatistic);
Env.getCurrentEnv().getStatisticsCache().syncColStats(data);
long timestamp = System.currentTimeMillis();
AnalysisInfo mockedJobInfo = new AnalysisInfoBuilder()
.setTblUpdateTime(System.currentTimeMillis())
.setTblUpdateTime(timestamp)
.setColName("")
.setRowCount((long) Double.parseDouble(rowCount))
.setJobColumns(Sets.newHashSet())
Expand All @@ -391,6 +392,7 @@ public static void alterColumnStatistics(AlterColumnStatsStmt alterColumnStatsSt
if (objects.table instanceof OlapTable) {
indexId = indexId == -1 ? ((OlapTable) objects.table).getBaseIndexId() : indexId;
mockedJobInfo.addIndexRowCount(indexId, (long) Double.parseDouble(rowCount));
mockedJobInfo.addIndexUpdateRowCountTime(indexId, timestamp);
}
Env.getCurrentEnv().getAnalysisManager().updateTableStatsForAlterStats(mockedJobInfo, objects.table);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ public class TableStatsMeta implements Writable, GsonPostProcessable {
@SerializedName("irc")
public ConcurrentMap<Long, Long> indexesRowCount = new ConcurrentHashMap<>();

@SerializedName("ircut")
public ConcurrentMap<Long, Long> indexesRowCountUpdateTime = new ConcurrentHashMap<>();

@VisibleForTesting
public TableStatsMeta() {
tblId = 0;
Expand Down Expand Up @@ -166,7 +169,8 @@ public void update(AnalysisInfo analyzedJob, TableIf tableIf) {
if (tableIf != null) {
if (tableIf instanceof OlapTable) {
indexesRowCount.putAll(analyzedJob.indexesRowCount);
clearStaleIndexRowCount((OlapTable) tableIf);
indexesRowCountUpdateTime.putAll(analyzedJob.indexesRowCountUpdateTime);
clearStaleIndexRowCountAndTime((OlapTable) tableIf);
}
rowCount = analyzedJob.rowCount;
if (rowCount == 0 && AnalysisMethod.SAMPLE.equals(analyzedJob.analysisMethod)) {
Expand Down Expand Up @@ -198,13 +202,20 @@ public void gsonPostProcess() throws IOException {
if (colToColStatsMeta == null) {
colToColStatsMeta = new ConcurrentHashMap<>();
}
if (indexesRowCountUpdateTime == null) {
indexesRowCountUpdateTime = new ConcurrentHashMap<>();
}
}

public long getRowCount(long indexId) {
return indexesRowCount.getOrDefault(indexId, -1L);
}

private void clearStaleIndexRowCount(OlapTable table) {
public long getRowCountUpdateTime(long indexId) {
return indexesRowCountUpdateTime.getOrDefault(indexId, 0L);
}

private void clearStaleIndexRowCountAndTime(OlapTable table) {
Iterator<Long> iterator = indexesRowCount.keySet().iterator();
List<Long> indexIds = table.getIndexIds();
while (iterator.hasNext()) {
Expand All @@ -213,6 +224,13 @@ private void clearStaleIndexRowCount(OlapTable table) {
iterator.remove();
}
}
iterator = indexesRowCountUpdateTime.keySet().iterator();
while (iterator.hasNext()) {
long key = iterator.next();
if (indexIds.contains(key)) {
iterator.remove();
}
}
}

public long getBaseIndexDeltaRowCount(OlapTable table) {
Expand Down
11 changes: 11 additions & 0 deletions regression-test/suites/statistics/test_analyze_mv.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ suite("test_analyze_mv") {
createMV("create materialized view mv3 as select key1, key2, sum(value1), max(value2), min(value3) from mvTestDup group by key1, key2;")
sql """insert into mvTestDup values (1, 2, 3, 4, 5), (1, 2, 3, 4, 5), (10, 20, 30, 40, 50), (10, 20, 30, 40, 50), (100, 200, 300, 400, 500), (1001, 2001, 3001, 4001, 5001);"""

def timestamp = System.currentTimeMillis();
sql """analyze table mvTestDup with sync;"""

// Test show index row count
Expand All @@ -140,21 +141,25 @@ suite("test_analyze_mv") {
assertEquals("mvTestDup", result_row[0][0])
assertEquals("mvTestDup", result_row[0][1])
assertEquals("6", result_row[0][2])
assertTrue(Long.parseLong(result_row[0][3]) >= timestamp)
result_row = sql """show index stats mvTestDup mv1"""
assertEquals(1, result_row.size())
assertEquals("mvTestDup", result_row[0][0])
assertEquals("mv1", result_row[0][1])
assertEquals("6", result_row[0][2])
assertTrue(Long.parseLong(result_row[0][3]) >= timestamp)
result_row = sql """show index stats mvTestDup mv2"""
assertEquals(1, result_row.size())
assertEquals("mvTestDup", result_row[0][0])
assertEquals("mv2", result_row[0][1])
assertEquals("6", result_row[0][2])
assertTrue(Long.parseLong(result_row[0][3]) >= timestamp)
result_row = sql """show index stats mvTestDup mv3"""
assertEquals(1, result_row.size())
assertEquals("mvTestDup", result_row[0][0])
assertEquals("mv3", result_row[0][1])
assertEquals("4", result_row[0][2])
assertTrue(Long.parseLong(result_row[0][3]) >= timestamp)

// Compare show whole table column stats result with show single column.
def result_all = sql """show column stats mvTestDup"""
Expand Down Expand Up @@ -433,21 +438,27 @@ suite("test_analyze_mv") {
assertEquals("FULL", result_sample[0][9])

// Test alter table index row count.
timestamp = System.currentTimeMillis();
sql """alter table mvTestDup modify column `value2` set stats ('row_count'='1.5E8', 'ndv'='3.0', 'num_nulls'='0.0', 'data_size'='1.5E8', 'min_value'='1', 'max_value'='10');"""
result_row = sql """show index stats mvTestDup mvTestDup;"""
assertEquals("mvTestDup", result_row[0][0])
assertEquals("mvTestDup", result_row[0][1])
assertEquals("150000000", result_row[0][2])
assertTrue(Long.parseLong(result_row[0][3]) >= timestamp)
timestamp = System.currentTimeMillis();
sql """alter table mvTestDup index mv1 modify column `mv_key1` set stats ('row_count'='3443', 'ndv'='3.0', 'num_nulls'='0.0', 'data_size'='1.5E8', 'min_value'='1', 'max_value'='10');"""
result_row = sql """show index stats mvTestDup mv1;"""
assertEquals("mvTestDup", result_row[0][0])
assertEquals("mv1", result_row[0][1])
assertEquals("3443", result_row[0][2])
assertTrue(Long.parseLong(result_row[0][3]) >= timestamp)
timestamp = System.currentTimeMillis();
sql """alter table mvTestDup index mv3 modify column `mva_MAX__``value2``` set stats ('row_count'='234234', 'ndv'='3.0', 'num_nulls'='0.0', 'data_size'='1.5E8', 'min_value'='1', 'max_value'='10');"""
result_row = sql """show index stats mvTestDup mv3;"""
assertEquals("mvTestDup", result_row[0][0])
assertEquals("mv3", result_row[0][1])
assertEquals("234234", result_row[0][2])
assertTrue(Long.parseLong(result_row[0][3]) >= timestamp)

sql """drop stats mvTestDup"""
result_sample = sql """show column stats mvTestDup"""
Expand Down

0 comments on commit d71379c

Please sign in to comment.