Skip to content

Commit

Permalink
update docs
Browse files Browse the repository at this point in the history
update docs

session variable

update docs

update docs

ut

ut

add more test

remove `enable_auto_sample`

core function finished
  • Loading branch information
Kikyou1997 committed Nov 13, 2023
1 parent 2c0f33a commit ccbb95f
Show file tree
Hide file tree
Showing 30 changed files with 1,429 additions and 846 deletions.
34 changes: 2 additions & 32 deletions fe/fe-common/src/main/java/org/apache/doris/common/Config.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@

import org.apache.doris.common.ExperimentalUtil.ExperimentalType;

import java.util.concurrent.TimeUnit;

public class Config extends ConfigBase {

@ConfField(description = {"用户自定义配置文件的路径,用于存放 fe_custom.conf。该文件中的配置会覆盖 fe.conf 中的配置",
Expand Down Expand Up @@ -1742,7 +1740,7 @@ public class Config extends ConfigBase {
* Used to determined how many statistics collection SQL could run simultaneously.
*/
@ConfField
public static int statistics_simultaneously_running_task_num = 10;
public static int statistics_simultaneously_running_task_num = 3;

/**
* if table has too many replicas, Fe occur oom when schema change.
Expand Down Expand Up @@ -2043,7 +2041,7 @@ public class Config extends ConfigBase {
* FE OOM.
*/
@ConfField
public static long stats_cache_size = 10_0000;
public static long stats_cache_size = 50_0000;

/**
* This configuration is used to enable the statistics of query information, which will record
Expand All @@ -2066,9 +2064,6 @@ public class Config extends ConfigBase {
"Whether to enable binlog feature"})
public static boolean enable_feature_binlog = false;

@ConfField
public static int analyze_task_timeout_in_hours = 12;

@ConfField(mutable = true, masterOnly = true, description = {
"是否禁止使用 WITH REOSOURCE 语句创建 Catalog。",
"Whether to disable creating catalog with WITH RESOURCE statement."})
Expand Down Expand Up @@ -2123,9 +2118,6 @@ public class Config extends ConfigBase {
@ConfField
public static boolean forbid_running_alter_job = false;

@ConfField
public static int table_stats_health_threshold = 80;

@ConfField(description = {
"暂时性配置项,开启后会自动将所有的olap表修改为可light schema change",
"temporary config filed, will make all olap tables enable light schema change"
Expand All @@ -2151,28 +2143,6 @@ public class Config extends ConfigBase {
+ "but it will increase the memory overhead."})
public static int virtual_node_number = 2048;

@ConfField(description = {"控制对大表的自动ANALYZE的最小时间间隔,"
+ "在该时间间隔内大小超过huge_table_lower_bound_size_in_bytes的表仅ANALYZE一次",
"This controls the minimum time interval for automatic ANALYZE on large tables. Within this interval,"
+ "tables larger than huge_table_lower_bound_size_in_bytes are analyzed only once."})
public static long huge_table_auto_analyze_interval_in_millis = TimeUnit.HOURS.toMillis(12);

@ConfField(description = {"定义大表的大小下界,在开启enable_auto_sample的情况下,"
+ "大小超过该值的表将会自动通过采样收集统计信息", "This defines the lower size bound for large tables. "
+ "When enable_auto_sample is enabled, tables larger than this value will automatically collect "
+ "statistics through sampling"})
public static long huge_table_lower_bound_size_in_bytes = 5L * 1024 * 1024 * 1024;

@ConfField(description = {"定义开启开启大表自动sample后,对大表的采样比例",
"This defines the number of sample percent for large tables when automatic sampling for"
+ "large tables is enabled"})
public static int huge_table_default_sample_rows = 4194304;

@ConfField(description = {"是否开启大表自动sample,开启后对于大小超过huge_table_lower_bound_size_in_bytes会自动通过采样收集"
+ "统计信息", "Whether to enable automatic sampling for large tables, which, when enabled, automatically"
+ "collects statistics through sampling for tables larger than 'huge_table_lower_bound_size_in_bytes'"})
public static boolean enable_auto_sample = false;

@ConfField(description = {
"控制统计信息的自动触发作业执行记录的持久化行数",
"Determine the persist number of automatic triggered analyze job execution status"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
import org.apache.doris.statistics.AnalysisInfo.AnalysisType;
import org.apache.doris.statistics.BaseAnalysisTask;
import org.apache.doris.statistics.HistogramTask;
import org.apache.doris.statistics.MVAnalysisTask;
import org.apache.doris.statistics.OlapAnalysisTask;
import org.apache.doris.statistics.TableStatsMeta;
import org.apache.doris.statistics.util.StatisticsUtil;
Expand Down Expand Up @@ -1102,11 +1101,9 @@ public TTableDescriptor toThrift() {
public BaseAnalysisTask createAnalysisTask(AnalysisInfo info) {
if (info.analysisType.equals(AnalysisType.HISTOGRAM)) {
return new HistogramTask(info);
}
if (info.analysisType.equals(AnalysisType.FUNDAMENTALS)) {
} else {
return new OlapAnalysisTask(info);
}
return new MVAnalysisTask(info);
}

public boolean needReAnalyzeTable(TableStatsMeta tblStats) {
Expand All @@ -1126,7 +1123,7 @@ public boolean needReAnalyzeTable(TableStatsMeta tblStats) {
}
long updateRows = tblStats.updatedRows.get();
int tblHealth = StatisticsUtil.getTableHealth(rowCount, updateRows);
return tblHealth < Config.table_stats_health_threshold;
return tblHealth < StatisticsUtil.getTableStatsHealthThreshold();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -571,10 +571,15 @@ private Statistics computeFilter(Filter filter) {
}

private ColumnStatistic getColumnStatistic(TableIf table, String colName) {
ConnectContext connectContext = ConnectContext.get();
if (connectContext != null && connectContext.getSessionVariable().internalSession) {
return ColumnStatistic.UNKNOWN;
}
if (totalColumnStatisticMap.get(table.getName() + colName) != null) {
return totalColumnStatisticMap.get(table.getName() + colName);
} else if (isPlayNereidsDump) {
return ColumnStatistic.UNKNOWN;

} else {
long catalogId;
long dbId;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.TimeUnit;

/**
* System variable.
Expand Down Expand Up @@ -410,6 +411,19 @@ public class SessionVariable implements Serializable, Writable {

public static final String FASTER_FLOAT_CONVERT = "faster_float_convert";

public static final String ENABLE_DECIMAL256 = "enable_decimal256";

public static final String STATS_INSERT_MERGE_ITEM_COUNT = "stats_insert_merge_item_count";

public static final String HUGE_TABLE_DEFAULT_SAMPLE_ROWS = "huge_table_default_sample_rows";
public static final String HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = "huge_table_lower_bound_size_in_bytes";

public static final String HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS
= "huge_table_auto_analyze_interval_in_millis";

public static final String TABLE_STATS_HEALTH_THRESHOLD
= "table_stats_health_threshold";

public static final List<String> DEBUG_VARIABLES = ImmutableList.of(
SKIP_DELETE_PREDICATE,
SKIP_DELETE_BITMAP,
Expand Down Expand Up @@ -463,7 +477,7 @@ public class SessionVariable implements Serializable, Writable {
public int queryTimeoutS = 900;

// query timeout in second.
@VariableMgr.VarAttr(name = ANALYZE_TIMEOUT, needForward = true)
@VariableMgr.VarAttr(name = ANALYZE_TIMEOUT, flag = VariableMgr.GLOBAL, needForward = true)
public int analyzeTimeoutS = 43200;

// The global max_execution_time value provides the default for the session value for new connections.
Expand Down Expand Up @@ -1150,6 +1164,22 @@ public void setMaxJoinNumberOfReorder(int maxJoinNumberOfReorder) {
+ " use a skiplist to optimize the intersection."})
public int invertedIndexConjunctionOptThreshold = 1000;

@VariableMgr.VarAttr(name = FULL_AUTO_ANALYZE_START_TIME, needForward = true, checker = "checkAnalyzeTimeFormat",
description = {"该参数定义自动ANALYZE例程的开始时间",
"This parameter defines the start time for the automatic ANALYZE routine."},
flag = VariableMgr.GLOBAL)
public String fullAutoAnalyzeStartTime = "00:00:00";

@VariableMgr.VarAttr(name = FULL_AUTO_ANALYZE_END_TIME, needForward = true, checker = "checkAnalyzeTimeFormat",
description = {"该参数定义自动ANALYZE例程的结束时间",
"This parameter defines the end time for the automatic ANALYZE routine."},
flag = VariableMgr.GLOBAL)
public String fullAutoAnalyzeEndTime = "23:59:59";

@VariableMgr.VarAttr(name = SQL_DIALECT, needForward = true, checker = "checkSqlDialect",
description = {"解析sql使用的方言", "The dialect used to parse sql."})
public String sqlDialect = "doris";

@VariableMgr.VarAttr(name = ENABLE_UNIQUE_KEY_PARTIAL_UPDATE, needForward = true)
public boolean enableUniqueKeyPartialUpdate = false;

Expand Down Expand Up @@ -1186,6 +1216,48 @@ public void setMaxJoinNumberOfReorder(int maxJoinNumberOfReorder) {
"the runtime filter id in IGNORE_RUNTIME_FILTER_IDS list will not be generated"})

public String ignoreRuntimeFilterIds = "";

@VariableMgr.VarAttr(name = STATS_INSERT_MERGE_ITEM_COUNT, flag = VariableMgr.GLOBAL, description = {
"控制统计信息相关INSERT攒批数量", "Controls the batch size for stats INSERT merging."
}
)
public int statsInsertMergeItemCount = 200;

@VariableMgr.VarAttr(name = HUGE_TABLE_DEFAULT_SAMPLE_ROWS, flag = VariableMgr.GLOBAL, description = {
"定义开启开启大表自动sample后,对大表的采样比例",
"This defines the number of sample percent for large tables when automatic sampling for"
+ "large tables is enabled"

})
public long hugeTableDefaultSampleRows = 4194304;


@VariableMgr.VarAttr(name = HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES, flag = VariableMgr.GLOBAL,
description = {
"大小超过该值的表将会自动通过采样收集统计信息",
"This defines the lower size bound for large tables. "
+ "When enable_auto_sample is enabled, tables"
+ "larger than this value will automatically collect "
+ "statistics through sampling"})
public long hugeTableLowerBoundSizeInBytes = 5L * 1024 * 1024 * 1024;

@VariableMgr.VarAttr(name = HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS, flag = VariableMgr.GLOBAL,
description = {"控制对大表的自动ANALYZE的最小时间间隔,"
+ "在该时间间隔内大小超过huge_table_lower_bound_size_in_bytes的表仅ANALYZE一次",
"This controls the minimum time interval for automatic ANALYZE on large tables."
+ "Within this interval,"
+ "tables larger than huge_table_lower_bound_size_in_bytes are analyzed only once."})
public long hugeTableAutoAnalyzeIntervalInMillis = TimeUnit.HOURS.toMillis(12);

@VariableMgr.VarAttr(name = TABLE_STATS_HEALTH_THRESHOLD, flag = VariableMgr.GLOBAL,
description = {"取值在0-100之间,当自上次统计信息收集操作之后"
+ "数据更新量达到 (100 - table_stats_health_threshold)% ,认为该表的统计信息已过时",
"The value should be between 0 and 100. When the data update quantity "
+ "exceeds (100 - table_stats_health_threshold)% since the last "
+ "statistics collection operation, the statistics for this table are"
+ "considered outdated."})
public int tableStatsHealthThreshold = 60;

public static final String IGNORE_RUNTIME_FILTER_IDS = "ignore_runtime_filter_ids";

public Set<Integer> getIgnoredRuntimeFilterIds() {
Expand Down
Loading

0 comments on commit ccbb95f

Please sign in to comment.