Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Improvement](statistics)Add config for the threshold of column count for auto analyze. #27713

Merged
merged 1 commit into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,8 @@ public class SessionVariable implements Serializable, Writable {

public static final String ENABLE_AUTO_ANALYZE = "enable_auto_analyze";

public static final String AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD = "auto_analyze_table_width_threshold";

public static final String FASTER_FLOAT_CONVERT = "faster_float_convert";

public static final String ENABLE_DECIMAL256 = "enable_decimal256";
Expand Down Expand Up @@ -1315,6 +1317,13 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) {
flag = VariableMgr.GLOBAL)
public boolean enableAutoAnalyze = true;

@VariableMgr.VarAttr(name = AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD,
description = {"参与自动收集的最大表宽度,列数多于这个参数的表不参与自动收集",
"Maximum table width to enable auto analyze, "
+ "table with more columns than this value will not be auto analyzed."},
flag = VariableMgr.GLOBAL)
public int autoAnalyzeTableWidthThreshold = 70;

@VariableMgr.VarAttr(name = AUTO_ANALYZE_START_TIME, needForward = true, checker = "checkAnalyzeTimeFormat",
description = {"该参数定义自动ANALYZE例程的开始时间",
"This parameter defines the start time for the automatic ANALYZE routine."},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ public class StatisticConstants {

public static final int SUBMIT_JOB_LIMIT = 5;

public static final int AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD = 70;

static {
SYSTEM_DBS.add(SystemInfoService.DEFAULT_CLUSTER
+ ClusterNamespace.CLUSTER_DELIMITER + FeConstants.INTERNAL_DB_NAME);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,9 +181,13 @@ protected void createAnalyzeJobForTbl(DatabaseIf<? extends TableIf> db,
protected AnalysisInfo getReAnalyzeRequiredPart(AnalysisInfo jobInfo) {
TableIf table = StatisticsUtil
.findTable(jobInfo.catalogId, jobInfo.dbId, jobInfo.tblId);
// Skip tables that are too width.
if (table.getBaseSchema().size() > StatisticsUtil.getAutoAnalyzeTableWidthThreshold()) {
return null;
}

AnalysisManager analysisManager = Env.getServingEnv().getAnalysisManager();
TableStatsMeta tblStats = analysisManager.findTableStatsStatus(table.getId());

if (!table.needReAnalyzeTable(tblStats)) {
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -939,6 +939,16 @@ public static int getAnalyzeTimeout() {
return StatisticConstants.ANALYZE_TIMEOUT_IN_SEC;
}

public static int getAutoAnalyzeTableWidthThreshold() {
try {
return findConfigFromGlobalSessionVar(SessionVariable.AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD)
.autoAnalyzeTableWidthThreshold;
} catch (Exception e) {
LOG.warn("Failed to get value of auto_analyze_table_width_threshold, return default", e);
}
return StatisticConstants.AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD;
}

public static String encodeValue(ResultRow row, int index) {
if (row == null || row.getValues().size() <= index) {
return "NULL";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,37 @@ public AnalysisInfo getAnalysisJobInfo(AnalysisInfo jobInfo, TableIf table,
Assertions.assertNotNull(statisticsAutoCollector.getReAnalyzeRequiredPart(analysisInfo2));
}

@Test
public void testSkipWideTable() {

TableIf tableIf = new OlapTable();

new MockUp<OlapTable>() {
@Mock
public List<Column> getBaseSchema() {
return Lists.newArrayList(new Column("col1", Type.INT), new Column("col2", Type.INT));
}
};

new MockUp<StatisticsUtil>() {
int count = 0;
int [] thresholds = {1, 10};
@Mock
public TableIf findTable(long catalogName, long dbName, long tblName) {
return tableIf;
}

@Mock
public int getAutoAnalyzeTableWidthThreshold() {
return thresholds[count++];
}
};
AnalysisInfo analysisInfo = new AnalysisInfoBuilder().build();
StatisticsAutoCollector statisticsAutoCollector = new StatisticsAutoCollector();
Assertions.assertNull(statisticsAutoCollector.getReAnalyzeRequiredPart(analysisInfo));
Assertions.assertNotNull(statisticsAutoCollector.getReAnalyzeRequiredPart(analysisInfo));
}

@Test
public void testLoop() {
AtomicBoolean timeChecked = new AtomicBoolean();
Expand Down