From e4b03036c3876d2dfab4e43357f0053fd97816f9 Mon Sep 17 00:00:00 2001 From: Dongyang Li Date: Mon, 27 Nov 2023 15:09:28 +0800 Subject: [PATCH 01/50] [chore](case) Use correct insert stmt for cold heat separation case #27546 (#27585) Co-authored-by: AlexYue --- .../suites/cold_heat_separation_p2/add_drop_partition.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regression-test/suites/cold_heat_separation_p2/add_drop_partition.groovy b/regression-test/suites/cold_heat_separation_p2/add_drop_partition.groovy index 24f2062e04bea9..77874710485e2f 100644 --- a/regression-test/suites/cold_heat_separation_p2/add_drop_partition.groovy +++ b/regression-test/suites/cold_heat_separation_p2/add_drop_partition.groovy @@ -255,7 +255,7 @@ suite("add_drop_partition") { """ sql """ - insert into ${tableName} values(1, "2017-01-01"); + insert into ${tableName} values(1, "2016-01-01"); """ partitions = sql "show partitions from ${tableName}" From 9efff15d4755535663f97ab0bd243d4a05011537 Mon Sep 17 00:00:00 2001 From: AlexYue Date: Mon, 27 Nov 2023 17:43:57 +0800 Subject: [PATCH 02/50] [enhance](S3) Print the error detail for every s3 operation (#27572) (#27615) --- be/src/io/fs/buffered_reader.cpp | 4 ++++ be/src/io/fs/s3_file_reader.cpp | 6 +++-- be/src/io/fs/s3_file_system.cpp | 7 +++--- be/src/io/fs/s3_file_writer.cpp | 38 +++++++++++++++++++++----------- 4 files changed, 37 insertions(+), 18 deletions(-) diff --git a/be/src/io/fs/buffered_reader.cpp b/be/src/io/fs/buffered_reader.cpp index fdcba04190f992..18e638d6d75cea 100644 --- a/be/src/io/fs/buffered_reader.cpp +++ b/be/src/io/fs/buffered_reader.cpp @@ -476,6 +476,10 @@ void PrefetchBuffer::prefetch_buffer() { return; } if (!s.ok() && _offset < _reader->size()) { + // We should print the error msg since this buffer might not be accessed by the consumer + // which would result in the status being missed + LOG_WARNING("prefetch path {} failed, offset {}, error {}", _reader->path().native(), + _offset, s.to_string()); _prefetch_status = std::move(s); } _buffer_status = BufferStatus::PREFETCHED; diff --git a/be/src/io/fs/s3_file_reader.cpp b/be/src/io/fs/s3_file_reader.cpp index 950f9cff172603..a9f1296907990b 100644 --- a/be/src/io/fs/s3_file_reader.cpp +++ b/be/src/io/fs/s3_file_reader.cpp @@ -98,8 +98,10 @@ Status S3FileReader::read_at_impl(size_t offset, Slice result, size_t* bytes_rea auto outcome = client->GetObject(request); s3_bvar::s3_get_total << 1; if (!outcome.IsSuccess()) { - return Status::IOError("failed to read from {}: {}", _path.native(), - outcome.GetError().GetMessage()); + return Status::IOError("failed to read from {}: {}, exception {}, error code {}", + _path.native(), outcome.GetError().GetMessage(), + outcome.GetError().GetExceptionName(), + outcome.GetError().GetResponseCode()); } *bytes_read = outcome.GetResult().GetContentLength(); if (*bytes_read != bytes_req) { diff --git a/be/src/io/fs/s3_file_system.cpp b/be/src/io/fs/s3_file_system.cpp index 82dd7ed2576525..5b70961c747231 100644 --- a/be/src/io/fs/s3_file_system.cpp +++ b/be/src/io/fs/s3_file_system.cpp @@ -538,9 +538,10 @@ Status S3FileSystem::get_key(const Path& path, std::string* key) const { template std::string S3FileSystem::error_msg(const std::string& key, const AwsOutcome& outcome) const { - return fmt::format("(endpoint: {}, bucket: {}, key:{}, {}), {}", _s3_conf.endpoint, - _s3_conf.bucket, key, outcome.GetError().GetExceptionName(), - outcome.GetError().GetMessage()); + return fmt::format("(endpoint: {}, bucket: {}, key:{}, {}), {}, error code {}", + _s3_conf.endpoint, _s3_conf.bucket, key, + outcome.GetError().GetExceptionName(), outcome.GetError().GetMessage(), + outcome.GetError().GetResponseCode()); } std::string S3FileSystem::error_msg(const std::string& key, const std::string& err) const { diff --git a/be/src/io/fs/s3_file_writer.cpp b/be/src/io/fs/s3_file_writer.cpp index c4a9906f097da8..da24d825a095e3 100644 --- a/be/src/io/fs/s3_file_writer.cpp +++ b/be/src/io/fs/s3_file_writer.cpp @@ -119,8 +119,11 @@ Status S3FileWriter::_create_multi_upload_request() { _upload_id = outcome.GetResult().GetUploadId(); return Status::OK(); } - return Status::IOError("failed to create multipart upload(bucket={}, key={}, upload_id={}): {}", - _bucket, _path.native(), _upload_id, outcome.GetError().GetMessage()); + return Status::IOError( + "failed to create multipart upload(bucket={}, key={}, upload_id={}): {}, exception {}, " + "error code {}", + _bucket, _path.native(), _upload_id, outcome.GetError().GetMessage(), + outcome.GetError().GetExceptionName(), outcome.GetError().GetResponseCode()); } void S3FileWriter::_wait_until_finish(std::string_view task_name) { @@ -171,8 +174,11 @@ Status S3FileWriter::abort() { _aborted = true; return Status::OK(); } - return Status::IOError("failed to abort multipart upload(bucket={}, key={}, upload_id={}): {}", - _bucket, _path.native(), _upload_id, outcome.GetError().GetMessage()); + return Status::IOError( + "failed to abort multipart upload(bucket={}, key={}, upload_id={}): {}, exception {}, " + "error code {}", + _bucket, _path.native(), _upload_id, outcome.GetError().GetMessage(), + outcome.GetError().GetExceptionName(), outcome.GetError().GetResponseCode()); } Status S3FileWriter::close() { @@ -281,9 +287,12 @@ void S3FileWriter::_upload_one_part(int64_t part_num, S3FileBuffer& buf) { UploadPartOutcome upload_part_outcome = upload_part_callable.get(); if (!upload_part_outcome.IsSuccess()) { auto s = Status::IOError( - "failed to upload part (bucket={}, key={}, part_num={}, up_load_id={}): {}", + "failed to upload part (bucket={}, key={}, part_num={}, up_load_id={}): {}, " + "exception {}, error code {}", _bucket, _path.native(), part_num, _upload_id, - upload_part_outcome.GetError().GetMessage()); + upload_part_outcome.GetError().GetMessage(), + upload_part_outcome.GetError().GetExceptionName(), + upload_part_outcome.GetError().GetResponseCode()); LOG_WARNING(s.to_string()); buf._on_failed(s); return; @@ -331,8 +340,11 @@ Status S3FileWriter::_complete() { if (!compute_outcome.IsSuccess()) { auto s = Status::IOError( - "failed to create complete multi part upload (bucket={}, key={}): {}", _bucket, - _path.native(), compute_outcome.GetError().GetMessage()); + "failed to create complete multi part upload (bucket={}, key={}): {}, exception " + "{}, error code {}", + _bucket, _path.native(), compute_outcome.GetError().GetMessage(), + compute_outcome.GetError().GetExceptionName(), + compute_outcome.GetError().GetResponseCode()); LOG_WARNING(s.to_string()); return s; } @@ -371,12 +383,12 @@ void S3FileWriter::_put_object(S3FileBuffer& buf) { auto response = _client->PutObject(request); s3_bvar::s3_put_total << 1; if (!response.IsSuccess()) { - _st = Status::InternalError("Error: [{}:{}, responseCode:{}]", - response.GetError().GetExceptionName(), - response.GetError().GetMessage(), - static_cast(response.GetError().GetResponseCode())); - buf._on_failed(_st); + _st = Status::InternalError( + "failed to put object (bucket={}, key={}), Error: [{}:{}, responseCode:{}]", + _bucket, _path.native(), response.GetError().GetExceptionName(), + response.GetError().GetMessage(), response.GetError().GetResponseCode()); LOG(WARNING) << _st; + buf._on_failed(_st); return; } _bytes_written += buf.get_size(); From c0e4e12fcbe0ca3107cb0439fa1e573dc651a463 Mon Sep 17 00:00:00 2001 From: xzj7019 <131111794+xzj7019@users.noreply.github.com> Date: Mon, 27 Nov 2023 20:43:31 +0800 Subject: [PATCH 03/50] [nereids] fix stats error when using dateTime type filter #27571 (#27577) --- .../doris/nereids/types/DateTimeType.java | 21 ++++++++ .../doris/nereids/types/DateTimeV2Type.java | 20 ++++++++ .../apache/doris/nereids/types/DateType.java | 21 ++++++++ .../doris/nereids/types/DateV2Type.java | 21 ++++++++ .../nereids/types/coercion/DateLikeType.java | 32 ++++++------ .../test_datetime_filter_stats0.groovy | 49 +++++++++++++++++++ 6 files changed, 150 insertions(+), 14 deletions(-) create mode 100644 regression-test/suites/nereids_p0/explain/test_datetime_filter_stats0.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateTimeType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateTimeType.java index b59e47d3eb2225..f57e29eab9358f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateTimeType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateTimeType.java @@ -20,6 +20,10 @@ import org.apache.doris.catalog.Type; import org.apache.doris.nereids.types.coercion.DateLikeType; +import java.time.DateTimeException; +import java.time.LocalDateTime; +import java.time.temporal.ChronoUnit; + /** * Datetime type in Nereids. */ @@ -46,4 +50,21 @@ public boolean equals(Object o) { public int width() { return WIDTH; } + + @Override + public double rangeLength(double high, double low) { + if (high == low) { + return 0; + } + if (Double.isInfinite(high) || Double.isInfinite(low)) { + return Double.POSITIVE_INFINITY; + } + try { + LocalDateTime to = toLocalDateTime(high); + LocalDateTime from = toLocalDateTime(low); + return ChronoUnit.SECONDS.between(from, to); + } catch (DateTimeException e) { + return Double.POSITIVE_INFINITY; + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateTimeV2Type.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateTimeV2Type.java index 12b8be5959ac55..77891ed3486525 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateTimeV2Type.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateTimeV2Type.java @@ -27,6 +27,9 @@ import com.google.common.base.Preconditions; +import java.time.DateTimeException; +import java.time.LocalDateTime; +import java.time.temporal.ChronoUnit; import java.util.Objects; /** @@ -127,4 +130,21 @@ public int width() { public int getScale() { return scale; } + + @Override + public double rangeLength(double high, double low) { + if (high == low) { + return 0; + } + if (Double.isInfinite(high) || Double.isInfinite(low)) { + return Double.POSITIVE_INFINITY; + } + try { + LocalDateTime to = toLocalDateTime(high); + LocalDateTime from = toLocalDateTime(low); + return ChronoUnit.SECONDS.between(from, to); + } catch (DateTimeException e) { + return Double.POSITIVE_INFINITY; + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateType.java index a33fec9d1c3167..a1d16246f6aa5e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateType.java @@ -20,6 +20,10 @@ import org.apache.doris.catalog.Type; import org.apache.doris.nereids.types.coercion.DateLikeType; +import java.time.DateTimeException; +import java.time.LocalDate; +import java.time.temporal.ChronoUnit; + /** * Date type in Nereids. */ @@ -41,5 +45,22 @@ public Type toCatalogDataType() { public int width() { return WIDTH; } + + @Override + public double rangeLength(double high, double low) { + if (high == low) { + return 0; + } + if (Double.isInfinite(high) || Double.isInfinite(low)) { + return Double.POSITIVE_INFINITY; + } + try { + LocalDate to = toLocalDate(high); + LocalDate from = toLocalDate(low); + return ChronoUnit.DAYS.between(from, to); + } catch (DateTimeException e) { + return Double.POSITIVE_INFINITY; + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateV2Type.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateV2Type.java index 9cf5efdbb6c4d8..0437fb0365ae58 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateV2Type.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateV2Type.java @@ -20,6 +20,10 @@ import org.apache.doris.catalog.Type; import org.apache.doris.nereids.types.coercion.DateLikeType; +import java.time.DateTimeException; +import java.time.LocalDate; +import java.time.temporal.ChronoUnit; + /** * Date type in Nereids. */ @@ -41,5 +45,22 @@ public Type toCatalogDataType() { public int width() { return WIDTH; } + + @Override + public double rangeLength(double high, double low) { + if (high == low) { + return 0; + } + if (Double.isInfinite(high) || Double.isInfinite(low)) { + return Double.POSITIVE_INFINITY; + } + try { + LocalDate to = toLocalDate(high); + LocalDate from = toLocalDate(low); + return ChronoUnit.DAYS.between(from, to); + } catch (DateTimeException e) { + return Double.POSITIVE_INFINITY; + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/coercion/DateLikeType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/coercion/DateLikeType.java index acbce88e5e294f..22ea99f00bc875 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/coercion/DateLikeType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/coercion/DateLikeType.java @@ -27,31 +27,35 @@ import org.apache.doris.nereids.types.DateType; import org.apache.doris.nereids.types.DateV2Type; -import java.time.temporal.ChronoUnit; -import java.util.Calendar; +import java.time.LocalDate; +import java.time.LocalDateTime; /** * date like type. */ public abstract class DateLikeType extends PrimitiveType { - private Calendar toCalendar(double d) { - //d = (year * 10000 + month * 100 + day) * 1000000L; + + protected LocalDate toLocalDate(double d) { + // d = (year * 10000 + month * 100 + day) * 1000000L; int date = (int) (d / 1000000); int day = date % 100; int month = (date / 100) % 100; int year = date / 10000; - Calendar calendar = Calendar.getInstance(); - calendar.set(Calendar.YEAR, year); - calendar.set(Calendar.MONTH, month); - calendar.set(Calendar.DAY_OF_MONTH, day); - return calendar; + return LocalDate.of(year, month, day); } - @Override - public double rangeLength(double high, double low) { - Calendar to = toCalendar(high); - Calendar from = toCalendar(low); - return ChronoUnit.DAYS.between(from.toInstant(), to.toInstant()); + protected LocalDateTime toLocalDateTime(double d) { + // d = (year * 10000 + month * 100 + day) * 1000000L + time + // time = (hour * 10000 + minute * 100 + second); + int date = (int) (d / 1000000); + int day = date % 100; + int month = (date / 100) % 100; + int year = date / 10000; + int time = (int) (d % 1000000); + int second = time % 100; + int minute = (time / 100) % 100; + int hour = time / 10000; + return LocalDateTime.of(year, month, day, hour, minute, second); } /** diff --git a/regression-test/suites/nereids_p0/explain/test_datetime_filter_stats0.groovy b/regression-test/suites/nereids_p0/explain/test_datetime_filter_stats0.groovy new file mode 100644 index 00000000000000..317645e89d969c --- /dev/null +++ b/regression-test/suites/nereids_p0/explain/test_datetime_filter_stats0.groovy @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_datetime_filter_stats0") { + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + + sql "DROP TABLE IF EXISTS test_datetime_filter_stats0" + sql """ CREATE TABLE `test_datetime_filter_stats0` ( + `id` int(11), + `is_delete` int, + `company_id` int, + `book_time` DATETIMEV2 + )ENGINE=OLAP + unique key (id) + distributed by hash(id) buckets 10 + properties( + "replication_allocation" = "tag.location.default: 1" + );""" + + sql """ alter table test_datetime_filter_stats0 modify column id set stats('row_count'='52899687', 'ndv'='52899687', 'num_nulls'='0', 'min_value'='1', 'max_value'='52899687', 'data_size'='4'); """ + sql """ alter table test_datetime_filter_stats0 modify column book_time set stats('row_count'='52899687', 'ndv'='23622730', 'num_nulls'='0', 'min_value'='2002-01-01 00:45:39', 'max_value'='2027-09-25 23:03:00', 'data_size'='10'); """ + sql """ alter table test_datetime_filter_stats0 modify column is_delete set stats('row_count'='52899687', 'ndv'='2', 'num_nulls'='0', 'min_value'='0', 'max_value'='1', 'data_size'='4'); """ + sql """ alter table test_datetime_filter_stats0 modify column company_id set stats('row_count'='52899687', 'ndv'='7559', 'num_nulls'='0', 'min_value'='2', 'max_value'='876981', 'data_size'='4'); """ + + explain { + sql("physical plan select count(1) from test_datetime_filter_stats0 o where o.book_time >= '2020-03-01 00:00:00.0' and o.book_time <= '2020-03-01 23:59:59.0';"); + notContains"stats=2.24" + } + + explain { + sql("physical plan select count(1) from test_datetime_filter_stats0 o where o.book_time >= '2020-03-01 00:00:00.0' and o.book_time <= '2020-03-01 00:00:01.0';"); + notContains"stats=2.24" + } +} From c75ceb19b02d670b205bcfea0e54df2913bdd57c Mon Sep 17 00:00:00 2001 From: starocean999 <40539150+starocean999@users.noreply.github.com> Date: Mon, 27 Nov 2023 20:44:03 +0800 Subject: [PATCH 04/50] [fix](planner)sort node should materialized required slots for itself #27605 (#27620) --- .../main/java/org/apache/doris/planner/JoinNodeBase.java | 3 --- .../src/main/java/org/apache/doris/planner/SortNode.java | 7 +++++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/JoinNodeBase.java b/fe/fe-core/src/main/java/org/apache/doris/planner/JoinNodeBase.java index 2a0a272e48838b..b635cfda59d992 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/JoinNodeBase.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/JoinNodeBase.java @@ -275,9 +275,6 @@ public void initOutputSlotIds(Set requiredSlotIdSet, Analyzer analyzer) SlotId firstMaterializedSlotId = null; for (TupleDescriptor tupleDescriptor : outputTupleDescList) { for (SlotDescriptor slotDescriptor : tupleDescriptor.getSlots()) { - if ((requiredSlotIdSet != null && requiredSlotIdSet.contains(slotDescriptor.getId()))) { - slotDescriptor.setIsMaterialized(true); - } if (slotDescriptor.isMaterialized()) { if ((requiredSlotIdSet == null || requiredSlotIdSet.contains(slotDescriptor.getId()))) { outputSlotIds.add(slotDescriptor.getId()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java index 70b76fa07b7b75..375e37edd8f565 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java @@ -269,6 +269,13 @@ public void getMaterializedIds(Analyzer analyzer, List ids) { Expr.getIds(info.getOrderingExprs(), null, ids); } + @Override + public void initOutputSlotIds(Set requiredSlotIdSet, Analyzer analyzer) { + // need call materializeRequiredSlots again to make sure required slots is materialized by children + // requiredSlotIdSet parameter means nothing for sort node, just call materializeRequiredSlots is enough + info.materializeRequiredSlots(analyzer, outputSmap); + } + private void removeUnusedExprs() { if (!isUnusedExprRemoved) { if (resolvedTupleExprs != null) { From ec55ad0a4849ee5a76ba21c4492f1728a6047d32 Mon Sep 17 00:00:00 2001 From: morrySnow <101034200+morrySnow@users.noreply.github.com> Date: Mon, 27 Nov 2023 20:44:42 +0800 Subject: [PATCH 05/50] [fix](Nereids) non-deterministic expression should not be constant (#27606) (#27631) --- .../org/apache/doris/nereids/trees/expressions/Expression.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Expression.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Expression.java index 2fd89cbfa4e488..d26e3b3e9f4ed2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Expression.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Expression.java @@ -21,6 +21,7 @@ import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.trees.AbstractTreeNode; import org.apache.doris.nereids.trees.expressions.functions.ExpressionTrait; +import org.apache.doris.nereids.trees.expressions.functions.Nondeterministic; import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.expressions.literal.NullLiteral; import org.apache.doris.nereids.trees.expressions.shape.LeafExpression; @@ -141,7 +142,7 @@ public boolean isConstant() { if (this instanceof LeafExpression) { return this instanceof Literal; } else { - return children().stream().allMatch(Expression::isConstant); + return !(this instanceof Nondeterministic) && children().stream().allMatch(Expression::isConstant); } } From 66042d0d332c2335ded1ba29e87686df5fd1ab4c Mon Sep 17 00:00:00 2001 From: AKIRA <33112463+Kikyou1997@users.noreply.github.com> Date: Tue, 28 Nov 2023 00:59:31 +0900 Subject: [PATCH 06/50] [enhancement](stats) Add process for aggstate type #27640 (#27642) --- .../java/org/apache/doris/analysis/AnalyzeTblStmt.java | 3 +-- .../org/apache/doris/statistics/util/StatisticsUtil.java | 4 +++- .../suites/statistics/test_agg_complex_type.groovy | 8 +++++--- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java index f69787f7e32c05..a88cd137ad53d7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeTblStmt.java @@ -34,7 +34,6 @@ import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.qe.ConnectContext; import org.apache.doris.statistics.AnalysisInfo.AnalysisType; -import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.collect.Sets; @@ -193,7 +192,7 @@ private void checkColumn() throws AnalysisException { ErrorReport.reportAnalysisException(ErrorCode.ERR_WRONG_COLUMN_NAME, colName, FeNameFormat.getColumnNameRegex()); } - if (ColumnStatistic.UNSUPPORTED_TYPE.contains(column.getType())) { + if (StatisticsUtil.isUnsupportedType(column.getType())) { containsUnsupportedTytpe = true; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index f2c77026312d6d..fe04879e156e38 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -31,6 +31,7 @@ import org.apache.doris.analysis.TableName; import org.apache.doris.analysis.UserIdentity; import org.apache.doris.analysis.VariableExpr; +import org.apache.doris.catalog.AggStateType; import org.apache.doris.catalog.ArrayType; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.DatabaseIf; @@ -759,7 +760,8 @@ public static boolean isUnsupportedType(Type type) { return type instanceof ArrayType || type instanceof StructType || type instanceof MapType - || type instanceof VariantType; + || type instanceof VariantType + || type instanceof AggStateType; } public static void sleep(long millis) { diff --git a/regression-test/suites/statistics/test_agg_complex_type.groovy b/regression-test/suites/statistics/test_agg_complex_type.groovy index 55af87f35bd632..4800959fe545be 100644 --- a/regression-test/suites/statistics/test_agg_complex_type.groovy +++ b/regression-test/suites/statistics/test_agg_complex_type.groovy @@ -17,12 +17,14 @@ suite("test_analyze_with_agg_complex_type") { sql """drop table if exists test_agg_complex_type;""" + sql """set enable_agg_state=true""" sql """create table test_agg_complex_type ( datekey int, device_id bitmap BITMAP_UNION NULL, hll_test hll hll_union, - qs QUANTILE_STATE QUANTILE_UNION + qs QUANTILE_STATE QUANTILE_UNION, + agg_st_1 agg_state max_by(int ,int) ) aggregate key (datekey) distributed by hash(datekey) buckets 1 @@ -30,9 +32,9 @@ suite("test_analyze_with_agg_complex_type") { "replication_num" = "1" );""" - sql """insert into test_agg_complex_type values (1,to_bitmap(1), hll_hash("11"), TO_QUANTILE_STATE("11", 1.0));""" + sql """insert into test_agg_complex_type values (1,to_bitmap(1), hll_hash("11"), TO_QUANTILE_STATE("11", 1.0), max_by_state(1,2));""" - sql """insert into test_agg_complex_type values (2, to_bitmap(1), hll_hash("12"), TO_QUANTILE_STATE("11", 1.0));""" + sql """insert into test_agg_complex_type values (2, to_bitmap(1), hll_hash("12"), TO_QUANTILE_STATE("11", 1.0), max_by_state(1,2));""" sql """ANALYZE TABLE test_agg_complex_type WITH SYNC""" From f2b1c4ee2fc26f2b74776c1add1e81864bd849ba Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Tue, 28 Nov 2023 08:36:11 +0800 Subject: [PATCH 07/50] [Fix](statistics)Fix bug and improve auto analyze. (#27626) (#27657) 1. Implement needReAnalyzeTable for ExternalTable. For now, external table will not be reanalyzed in 10 days. 2. For HiveMetastoreCache.loadPartitions, handle the empty iterator case to avoid Index out of boundary exception. 3. Wrap handle show analyze loop with try catch, so that when one table failed (for example, catalog dropped so the table couldn't be found anymore), we can still show the other tables. 4. For now, only OlapTable and Hive HMSExternalTable support sample analyze, throw exception for other types of table. 5. In StatisticsCollector, call constructJob after createTableLevelTaskForExternalTable to avoid NPE. --- .../doris/catalog/external/ExternalTable.java | 15 +++- .../datasource/hive/HiveMetaStoreCache.java | 5 +- .../org/apache/doris/qe/SessionVariable.java | 9 +++ .../org/apache/doris/qe/ShowExecutor.java | 74 +++++++++++-------- .../doris/statistics/AnalysisManager.java | 23 ++++++ .../doris/statistics/StatisticConstants.java | 2 + .../statistics/StatisticsAutoCollector.java | 19 ++++- .../doris/statistics/StatisticsCollector.java | 2 +- .../doris/statistics/util/StatisticsUtil.java | 10 +++ 9 files changed, 120 insertions(+), 39 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java index 4eab7ebf813620..3db23462c204d0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/ExternalTable.java @@ -388,9 +388,18 @@ public void gsonPostProcess() throws IOException { @Override public boolean needReAnalyzeTable(TableStatsMeta tblStats) { - // TODO: Find a way to decide if this external table need to be reanalyzed. - // For now, simply return true for all external tables. - return true; + if (tblStats == null) { + return true; + } + if (!tblStats.analyzeColumns().containsAll(getBaseSchema() + .stream() + .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())) + .map(Column::getName) + .collect(Collectors.toSet()))) { + return true; + } + return System.currentTimeMillis() + - tblStats.updatedTime > StatisticsUtil.getExternalTableAutoAnalyzeIntervalInMillis(); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java index f92c2e545ac6f0..0ab5179ffa5b89 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java @@ -320,6 +320,10 @@ private HivePartition loadPartition(PartitionCacheKey key) { } private Map loadPartitions(Iterable keys) { + Map ret = new HashMap<>(); + if (keys == null || !keys.iterator().hasNext()) { + return ret; + } PartitionCacheKey oneKey = Iterables.get(keys, 0); String dbName = oneKey.getDbName(); String tblName = oneKey.getTblName(); @@ -341,7 +345,6 @@ private Map loadPartitions(Iterable partitions = catalog.getClient().getPartitions(dbName, tblName, partitionNames); // Compose the return result map. - Map ret = new HashMap<>(); for (Partition partition : partitions) { StorageDescriptor sd = partition.getSd(); ret.put(new PartitionCacheKey(dbName, tblName, partition.getValues()), diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index fa8f13512e1c0d..f5f139a66d8658 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -427,6 +427,9 @@ public class SessionVariable implements Serializable, Writable { public static final String HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS = "huge_table_auto_analyze_interval_in_millis"; + public static final String EXTERNAL_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS + = "external_table_auto_analyze_interval_in_millis"; + public static final String TABLE_STATS_HEALTH_THRESHOLD = "table_stats_health_threshold"; @@ -1249,6 +1252,12 @@ public void setMaxJoinNumberOfReorder(int maxJoinNumberOfReorder) { + "tables larger than huge_table_lower_bound_size_in_bytes are analyzed only once."}) public long hugeTableAutoAnalyzeIntervalInMillis = TimeUnit.HOURS.toMillis(12); + @VariableMgr.VarAttr(name = EXTERNAL_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS, flag = VariableMgr.GLOBAL, + description = {"控制对外表的自动ANALYZE的最小时间间隔,在该时间间隔内的外表仅ANALYZE一次", + "This controls the minimum time interval for automatic ANALYZE on external tables." + + "Within this interval, external tables are analyzed only once."}) + public long externalTableAutoAnalyzeIntervalInMillis = TimeUnit.HOURS.toMillis(24); + @VariableMgr.VarAttr(name = TABLE_STATS_HEALTH_THRESHOLD, flag = VariableMgr.GLOBAL, description = {"取值在0-100之间,当自上次统计信息收集操作之后" + "数据更新量达到 (100 - table_stats_health_threshold)% ,认为该表的统计信息已过时", diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index 575d1ed6270823..45f3fc8030dcee 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -2590,37 +2590,51 @@ private void handleShowAnalyze() { List> resultRows = Lists.newArrayList(); DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); for (AnalysisInfo analysisInfo : results) { - List row = new ArrayList<>(); - row.add(String.valueOf(analysisInfo.jobId)); - CatalogIf> c = StatisticsUtil.findCatalog(analysisInfo.catalogId); - row.add(c.getName()); - Optional> databaseIf = c.getDb(analysisInfo.dbId); - row.add(databaseIf.isPresent() ? databaseIf.get().getFullName() : "DB may get deleted"); - if (databaseIf.isPresent()) { - Optional table = databaseIf.get().getTable(analysisInfo.tblId); - row.add(table.isPresent() ? table.get().getName() : "Table may get deleted"); - } else { - row.add("DB may get deleted"); + try { + List row = new ArrayList<>(); + row.add(String.valueOf(analysisInfo.jobId)); + CatalogIf> c + = StatisticsUtil.findCatalog(analysisInfo.catalogId); + row.add(c.getName()); + Optional> databaseIf = c.getDb(analysisInfo.dbId); + row.add(databaseIf.isPresent() ? databaseIf.get().getFullName() : "DB may get deleted"); + if (databaseIf.isPresent()) { + Optional table = databaseIf.get().getTable(analysisInfo.tblId); + row.add(table.isPresent() ? table.get().getName() : "Table may get deleted"); + } else { + row.add("DB may get deleted"); + } + row.add(analysisInfo.colName); + row.add(analysisInfo.jobType.toString()); + row.add(analysisInfo.analysisType.toString()); + row.add(analysisInfo.message); + row.add(TimeUtils.DATETIME_FORMAT.format( + LocalDateTime.ofInstant(Instant.ofEpochMilli(analysisInfo.lastExecTimeInMs), + ZoneId.systemDefault()))); + row.add(analysisInfo.state.toString()); + try { + row.add(showStmt.isAuto() + ? analysisInfo.progress + : Env.getCurrentEnv().getAnalysisManager().getJobProgress(analysisInfo.jobId)); + } catch (Exception e) { + row.add("N/A"); + LOG.warn("Failed to get progress for job: {}", analysisInfo, e); + } + row.add(analysisInfo.scheduleType.toString()); + LocalDateTime startTime = + LocalDateTime.ofInstant(Instant.ofEpochMilli(analysisInfo.startTime), + java.time.ZoneId.systemDefault()); + LocalDateTime endTime = + LocalDateTime.ofInstant(Instant.ofEpochMilli(analysisInfo.endTime), + java.time.ZoneId.systemDefault()); + row.add(startTime.format(formatter)); + row.add(endTime.format(formatter)); + resultRows.add(row); + } catch (Exception e) { + LOG.warn("Failed to get analyze info for table {}.{}.{}, reason: {}", + analysisInfo.catalogId, analysisInfo.dbId, analysisInfo.tblId, e.getMessage()); + continue; } - row.add(analysisInfo.colName); - row.add(analysisInfo.jobType.toString()); - row.add(analysisInfo.analysisType.toString()); - row.add(analysisInfo.message); - row.add(TimeUtils.DATETIME_FORMAT.format( - LocalDateTime.ofInstant(Instant.ofEpochMilli(analysisInfo.lastExecTimeInMs), - ZoneId.systemDefault()))); - row.add(analysisInfo.state.toString()); - row.add(Env.getCurrentEnv().getAnalysisManager().getJobProgress(analysisInfo.jobId)); - row.add(analysisInfo.scheduleType.toString()); - LocalDateTime startTime = - LocalDateTime.ofInstant(Instant.ofEpochMilli(analysisInfo.createTime), - java.time.ZoneId.systemDefault()); - LocalDateTime endTime = - LocalDateTime.ofInstant(Instant.ofEpochMilli(analysisInfo.endTime), - java.time.ZoneId.systemDefault()); - row.add(startTime.format(formatter)); - row.add(endTime.format(formatter)); - resultRows.add(row); } resultSet = new ShowResultSet(showStmt.getMetaData(), resultRows); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 51247c9ea12820..e987130b53e0ed 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -29,6 +29,7 @@ import org.apache.doris.catalog.Column; import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.TableIf; import org.apache.doris.catalog.View; @@ -328,6 +329,12 @@ protected AnalysisInfo buildAndAssignJob(AnalyzeTblStmt stmt) throws DdlExceptio // No statistics need to be collected or updated return null; } + // Only OlapTable and Hive HMSExternalTable support sample analyze. + if ((stmt.getSamplePercent() > 0 || stmt.getSampleRows() > 0) && !canSample(stmt.getTable())) { + String message = String.format("Table %s doesn't support sample analyze.", stmt.getTable().getName()); + LOG.info(message); + throw new DdlException(message); + } boolean isSync = stmt.isSync(); Map analysisTaskInfos = new HashMap<>(); @@ -1077,4 +1084,20 @@ public void removeJob(long id) { public boolean hasUnFinished() { return !analysisJobIdToTaskMap.isEmpty(); } + + /** + * Only OlapTable and Hive HMSExternalTable can sample for now. + * @param table + * @return Return true if the given table can do sample analyze. False otherwise. + */ + public boolean canSample(TableIf table) { + if (table instanceof OlapTable) { + return true; + } + if (table instanceof HMSExternalTable + && ((HMSExternalTable) table).getDlaType().equals(HMSExternalTable.DLAType.HIVE)) { + return true; + } + return false; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java index ee07d52d3b2632..9f1bd3bf681896 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java @@ -90,6 +90,8 @@ public class StatisticConstants { public static final long HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS = TimeUnit.HOURS.toMillis(12); + public static final long EXTERNAL_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS = TimeUnit.HOURS.toMillis(24); + public static final int TABLE_STATS_HEALTH_THRESHOLD = 60; public static final int ANALYZE_TIMEOUT_IN_SEC = 43200; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java index 93527bd33193d3..52299bd301c5f1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java @@ -22,7 +22,7 @@ import org.apache.doris.catalog.Env; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.TableIf; -import org.apache.doris.catalog.external.ExternalTable; +import org.apache.doris.catalog.external.HMSExternalTable; import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; import org.apache.doris.common.util.TimeUtils; @@ -107,17 +107,28 @@ public void analyzeDb(DatabaseIf databaseIf) throws DdlException { protected List constructAnalysisInfo(DatabaseIf db) { List analysisInfos = new ArrayList<>(); for (TableIf table : db.getTables()) { - if (skip(table)) { + try { + if (skip(table)) { + continue; + } + createAnalyzeJobForTbl(db, analysisInfos, table); + } catch (Throwable t) { + LOG.warn("Failed to analyze table {}.{}.{}", + db.getCatalog().getName(), db.getFullName(), table.getName(), t); continue; } - createAnalyzeJobForTbl(db, analysisInfos, table); } return analysisInfos; } // return true if skip auto analyze this time. protected boolean skip(TableIf table) { - if (!(table instanceof OlapTable || table instanceof ExternalTable)) { + if (!(table instanceof OlapTable || table instanceof HMSExternalTable)) { + return true; + } + // For now, only support Hive HMS table auto collection. + if (table instanceof HMSExternalTable + && !((HMSExternalTable) table).getDlaType().equals(HMSExternalTable.DLAType.HIVE)) { return true; } if (table.getDataSize(true) < StatisticsUtil.getHugeTableLowerBoundSizeInBytes() * 5) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCollector.java index f71d589d4ef537..63dcdab09ab4d2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCollector.java @@ -78,10 +78,10 @@ protected void createSystemAnalysisJob(AnalysisInfo jobInfo) Map analysisTasks = new HashMap<>(); AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); analysisManager.createTaskForEachColumns(jobInfo, analysisTasks, false); - Env.getCurrentEnv().getAnalysisManager().constructJob(jobInfo, analysisTasks.values()); if (StatisticsUtil.isExternalTable(jobInfo.catalogId, jobInfo.dbId, jobInfo.tblId)) { analysisManager.createTableLevelTaskForExternalTable(jobInfo, analysisTasks, false); } + Env.getCurrentEnv().getAnalysisManager().constructJob(jobInfo, analysisTasks.values()); Env.getCurrentEnv().getAnalysisManager().registerSysJob(jobInfo, analysisTasks); analysisTasks.values().forEach(analysisTaskExecutor::submitTask); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index fe04879e156e38..212e61fa10aac8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -906,6 +906,16 @@ public static long getHugeTableAutoAnalyzeIntervalInMillis() { return StatisticConstants.HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS; } + public static long getExternalTableAutoAnalyzeIntervalInMillis() { + try { + return findConfigFromGlobalSessionVar(SessionVariable.EXTERNAL_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS) + .externalTableAutoAnalyzeIntervalInMillis; + } catch (Exception e) { + LOG.warn("Failed to get value of externalTableAutoAnalyzeIntervalInMillis, return default", e); + } + return StatisticConstants.EXTERNAL_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS; + } + public static long getTableStatsHealthThreshold() { try { return findConfigFromGlobalSessionVar(SessionVariable.TABLE_STATS_HEALTH_THRESHOLD) From e953e8c0c909481b869757a7bf4c66a511249c08 Mon Sep 17 00:00:00 2001 From: yiguolei <676222867@qq.com> Date: Tue, 28 Nov 2023 10:17:30 +0800 Subject: [PATCH 08/50] [profile](bugfix) should not cache profile content because the profile may not be a full profile (#27635) --------- Co-authored-by: yiguolei --- .../apache/doris/common/util/ProfileManager.java | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/ProfileManager.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/ProfileManager.java index ae6eb311619af7..1a544583beddc7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/ProfileManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/ProfileManager.java @@ -71,8 +71,6 @@ public ProfileElement(RuntimeProfile profile) { } private final RuntimeProfile profile; - // cache the result of getProfileContent method - private volatile String profileContent; public Map infoStrings = Maps.newHashMap(); public MultiProfileTreeBuilder builder = null; public String errMsg = ""; @@ -81,12 +79,8 @@ public ProfileElement(RuntimeProfile profile) { // lazy load profileContent because sometimes profileContent is very large public String getProfileContent() { - if (profileContent != null) { - return profileContent; - } // no need to lock because the possibility of concurrent read is very low - profileContent = profile.toString(); - return profileContent; + return profile.toString(); } public String getProfileBrief() { @@ -103,7 +97,8 @@ public void setStatsErrorEstimator(StatsErrorEstimator statsErrorEstimator) { } } - // only protect queryIdDeque; queryIdToProfileMap is concurrent, no need to protect + // only protect queryIdDeque; queryIdToProfileMap is concurrent, no need to + // protect private ReentrantReadWriteLock lock; private ReadLock readLock; private WriteLock writeLock; @@ -165,7 +160,8 @@ public void pushProfile(RuntimeProfile profile) { ProfileElement element = createElement(profile); // 'insert into' does have job_id, put all profiles key with query_id String key = element.infoStrings.get(SummaryProfile.PROFILE_ID); - // check when push in, which can ensure every element in the list has QUERY_ID column, + // check when push in, which can ensure every element in the list has QUERY_ID + // column, // so there is no need to check when remove element from list. if (Strings.isNullOrEmpty(key)) { LOG.warn("the key or value of Map is null, " From eed37d2173dc2ba853e50d3bd7114b4a9dda8352 Mon Sep 17 00:00:00 2001 From: DuRipeng <453243496@qq.com> Date: Tue, 28 Nov 2023 10:33:15 +0800 Subject: [PATCH 09/50] [Enhance](fe) Support setting initial root password when FE firstly launch (#27438) (#27603) --- docs/en/docs/admin-manual/config/fe-config.md | 12 +++++++++ .../docs/admin-manual/config/fe-config.md | 12 +++++++++ .../java/org/apache/doris/common/Config.java | 12 +++++++++ .../java/org/apache/doris/catalog/Env.java | 2 ++ .../apache/doris/mysql/privilege/Auth.java | 25 +++++++++++++++++++ .../doris/mysql/privilege/AuthTest.java | 18 +++++++++++++ 6 files changed, 81 insertions(+) diff --git a/docs/en/docs/admin-manual/config/fe-config.md b/docs/en/docs/admin-manual/config/fe-config.md index 56f77d3f7743f4..a34e3a56a62cdc 100644 --- a/docs/en/docs/admin-manual/config/fe-config.md +++ b/docs/en/docs/admin-manual/config/fe-config.md @@ -376,6 +376,18 @@ Is it a configuration item unique to the Master FE node: true Whether to enable the multi-tags function of a single BE +#### `initial_root_password` + +Set root user initial 2-staged SHA-1 encrypted password, default as '', means no root password. Subsequent `set password` operations for root user will overwrite the initial root password. + +Example: If you want to configure a plaintext password `root@123`. You can execute Doris SQL `select password('root@123')` to generate encrypted password `*A00C34073A26B40AB4307650BFB9309D6BFA6999`. + +Default: empty string + +Is it possible to dynamically configure: false + +Is it a configuration item unique to the Master FE node: true + ### Service #### `query_port` diff --git a/docs/zh-CN/docs/admin-manual/config/fe-config.md b/docs/zh-CN/docs/admin-manual/config/fe-config.md index cee124b99805eb..f1ebb92935db72 100644 --- a/docs/zh-CN/docs/admin-manual/config/fe-config.md +++ b/docs/zh-CN/docs/admin-manual/config/fe-config.md @@ -376,6 +376,18 @@ heartbeat_mgr 中处理心跳事件的线程数。 是否开启单BE的多标签功能 +#### `initial_root_password` + +设置 root 用户初始化2阶段 SHA-1 加密密码,默认为'',即不设置 root 密码。后续 root 用户的 `set password` 操作会将 root 初始化密码覆盖。 + +示例:如要配置密码的明文是 `root@123`,可在Doris执行SQL `select password('root@123')` 获取加密密码 `*A00C34073A26B40AB4307650BFB9309D6BFA6999`。 + +默认值:空字符串 + +是否可以动态配置:false + +是否为 Master FE 节点独有的配置项:true + ### 服务 #### `query_port` diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 6b5626fedbade8..92fa8130757be3 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -2210,6 +2210,18 @@ public class Config extends ConfigBase { @ConfField(mutable = true, masterOnly = true) public static int publish_topic_info_interval_ms = 30000; // 30s + @ConfField(masterOnly = true, description = { + "设置 root 用户初始化2阶段 SHA-1 加密密码,默认为'',即不设置 root 密码。" + + "后续 root 用户的 `set password` 操作会将 root 初始化密码覆盖。" + + "示例:如要配置密码的明文是 `root@123`,可在Doris执行SQL `select password('root@123')` " + + "获取加密密码 `*A00C34073A26B40AB4307650BFB9309D6BFA6999`", + "Set root user initial 2-staged SHA-1 encrypted password, default as '', means no root password. " + + "Subsequent `set password` operations for root user will overwrite the initial root password. " + + "Example: If you want to configure a plaintext password `root@123`." + + "You can execute Doris SQL `select password('root@123')` to generate encrypted " + + "password `*A00C34073A26B40AB4307650BFB9309D6BFA6999`"}) + public static String initial_root_password = ""; + @ConfField(description = { "限制fe节点thrift server可以接收的最大包大小,默认20M,设置为-1表示不限制", "the max package size fe thrift server can receive,avoid accepting error" diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java index 402ef8174087d6..3a7706fa613668 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java @@ -1366,6 +1366,8 @@ private void transferToMaster() { editLog.logAddFirstFrontend(self); initLowerCaseTableNames(); + // Set initial root password if master FE first time launch. + auth.setInitialRootPassword(Config.initial_root_password); } else { if (journalVersion <= FeMetaVersion.VERSION_114) { // if journal version is less than 114, which means it is upgraded from version before 2.0. diff --git a/fe/fe-core/src/main/java/org/apache/doris/mysql/privilege/Auth.java b/fe/fe-core/src/main/java/org/apache/doris/mysql/privilege/Auth.java index 3d0c119f480074..3696245d374b5d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mysql/privilege/Auth.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mysql/privilege/Auth.java @@ -55,6 +55,7 @@ import org.apache.doris.ldap.LdapManager; import org.apache.doris.ldap.LdapUserInfo; import org.apache.doris.load.DppConfig; +import org.apache.doris.mysql.MysqlPassword; import org.apache.doris.persist.AlterUserOperationLog; import org.apache.doris.persist.LdapInfo; import org.apache.doris.persist.PrivInfo; @@ -70,6 +71,7 @@ import com.google.common.collect.Maps; import com.google.common.collect.Sets; import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -1349,6 +1351,29 @@ private void initUser() { } } + public void setInitialRootPassword(String initialRootPassword) { + // Skip set root password if `initial_root_password` set to empty string + if (StringUtils.isEmpty(initialRootPassword)) { + return; + } + byte[] scramble; + try { + scramble = MysqlPassword.checkPassword(initialRootPassword); + } catch (AnalysisException e) { + // Skip set root password if `initial_root_password` is not valid 2-staged SHA-1 encrypted + LOG.warn("initial_root_password [{}] is not valid 2-staged SHA-1 encrypted, ignore it", + initialRootPassword); + return; + } + UserIdentity rootUser = new UserIdentity(ROOT_USER, "%"); + rootUser.setIsAnalyzed(); + try { + setPasswordInternal(rootUser, scramble, null, false, false, false); + } catch (DdlException e) { + LOG.warn("Fail to set initial root password, ignore it", e); + } + } + public List> getRoleInfo() { readLock(); try { diff --git a/fe/fe-core/src/test/java/org/apache/doris/mysql/privilege/AuthTest.java b/fe/fe-core/src/test/java/org/apache/doris/mysql/privilege/AuthTest.java index 10fa234607d3dc..8e7a0508dbf503 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/mysql/privilege/AuthTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/mysql/privilege/AuthTest.java @@ -39,6 +39,7 @@ import org.apache.doris.common.ExceptionChecker; import org.apache.doris.common.UserException; import org.apache.doris.datasource.InternalCatalog; +import org.apache.doris.mysql.MysqlPassword; import org.apache.doris.persist.EditLog; import org.apache.doris.persist.PrivInfo; import org.apache.doris.qe.ConnectContext; @@ -2335,4 +2336,21 @@ public void testShowViewPriv() throws UserException { Lists.newArrayList(new AccessPrivilegeWithCols(AccessPrivilege.DROP_PRIV))); revoke(revokeStmt); } + + @Test + public void testSetInitialRootPassword() { + // Skip set root password if `initial_root_password` set to empty string + auth.setInitialRootPassword(""); + Assert.assertTrue( + auth.checkPlainPasswordForTest("root", "192.168.0.1", null, null)); + // Skip set root password if `initial_root_password` is not valid 2-staged SHA-1 encrypted + auth.setInitialRootPassword("invalidRootPassword"); + Assert.assertTrue( + auth.checkPlainPasswordForTest("root", "192.168.0.1", null, null)); + // Set initial root password + byte[] scrambled = MysqlPassword.makeScrambledPassword("validRootPassword"); + auth.setInitialRootPassword(new String(scrambled)); + Assert.assertTrue( + auth.checkPlainPasswordForTest("root", "192.168.0.1", "validRootPassword", null)); + } } From 15d7b17c81712a399edb6c173333b2eeb818b5f4 Mon Sep 17 00:00:00 2001 From: Mingyu Chen Date: Tue, 28 Nov 2023 10:36:22 +0800 Subject: [PATCH 10/50] [opt](plan) only lock olap table when query plan #27639 (#27656) bp #27639 --- .../java/org/apache/doris/catalog/OlapTable.java | 13 ++++++++++++- .../main/java/org/apache/doris/catalog/TableIf.java | 9 +++++++++ .../org/apache/doris/nereids/CascadesContext.java | 3 +++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index 21d9ee92f3d688..3ac83a3f3d3de9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -2354,6 +2354,17 @@ public boolean isDistributionColumn(String columnName) { @Override public boolean isPartitionColumn(String columnName) { return getPartitionInfo().getPartitionColumns().stream() - .anyMatch(c -> c.getName().equalsIgnoreCase(columnName)); + .anyMatch(c -> c.getName().equalsIgnoreCase(columnName)); + } + + /** + * For olap table, we need to acquire read lock when plan. + * Because we need to make sure the partition's version remain unchanged when plan. + * + * @return + */ + @Override + public boolean needReadLockWhenPlan() { + return true; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java index 3539d17e269a53..3f53fa1bfaef07 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java @@ -149,6 +149,15 @@ default int getBaseColumnIdxByName(String colName) { void write(DataOutput out) throws IOException; + /** + * return true if this kind of table need read lock when doing query plan. + * + * @return + */ + default boolean needReadLockWhenPlan() { + return false; + } + /** * Doris table type. */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/CascadesContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/CascadesContext.java index a81e9f008fab0e..39edea95d202ea 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/CascadesContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/CascadesContext.java @@ -521,6 +521,9 @@ public Lock(LogicalPlan plan, CascadesContext cascadesContext) { cascadesContext.extractTables(plan); } for (TableIf table : cascadesContext.tables.values()) { + if (!table.needReadLockWhenPlan()) { + continue; + } if (!table.tryReadLock(1, TimeUnit.MINUTES)) { close(); throw new RuntimeException(String.format("Failed to get read lock on table: %s", table.getName())); From f1aedd192b037427dc567ee901f2e7e9988b577b Mon Sep 17 00:00:00 2001 From: wangbo Date: Tue, 28 Nov 2023 14:07:18 +0800 Subject: [PATCH 11/50] select coordinator node from user's tag when exec streaming load (#27106) (#27677) --- .../java/org/apache/doris/httpv2/rest/LoadAction.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/LoadAction.java b/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/LoadAction.java index 9bfbbccfa8a6b0..b358ea60b9ae96 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/LoadAction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/LoadAction.java @@ -27,6 +27,7 @@ import org.apache.doris.httpv2.exception.UnauthorizedException; import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.qe.ConnectContext; +import org.apache.doris.resource.Tag; import org.apache.doris.service.ExecuteEnv; import org.apache.doris.system.Backend; import org.apache.doris.system.BeSelectionPolicy; @@ -46,6 +47,7 @@ import java.net.URI; import java.util.List; +import java.util.Set; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; @@ -227,7 +229,11 @@ private Object executeStreamLoad2PC(HttpServletRequest request, String db) { } private TNetworkAddress selectRedirectBackend(String clusterName) throws LoadException { - BeSelectionPolicy policy = new BeSelectionPolicy.Builder().needLoadAvailable().build(); + String qualifiedUser = ConnectContext.get().getQualifiedUser(); + Set userTags = Env.getCurrentEnv().getAuth().getResourceTags(qualifiedUser); + BeSelectionPolicy policy = new BeSelectionPolicy.Builder() + .addTags(userTags) + .needLoadAvailable().build(); List backendIds = Env.getCurrentSystemInfo().selectBackendIdsByPolicy(policy, 1); if (backendIds.isEmpty()) { throw new LoadException(SystemInfoService.NO_BACKEND_LOAD_AVAILABLE_MSG + ", policy: " + policy); From fa04587af066e7ccbd9e9e557c43a32840e52141 Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Tue, 28 Nov 2023 19:43:49 +0800 Subject: [PATCH 12/50] [fix](statistics)Need to recalculate health value when table row count become 0 #27673 (#27674) backport #27673 --- .../doris/statistics/AnalysisManager.java | 15 ++++ .../statistics/StatisticsAutoCollector.java | 45 +++++++++-- .../doris/statistics/util/StatisticsUtil.java | 3 +- .../doris/statistics/AnalysisManagerTest.java | 1 - .../StatisticsAutoCollectorTest.java | 74 +++++++++++++++++++ 5 files changed, 128 insertions(+), 10 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index e987130b53e0ed..9f1db45a28501a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -715,6 +715,21 @@ public void dropStats(DropStatsStmt dropStatsStmt) throws DdlException { StatisticsRepository.dropStatistics(tblId, cols); } + public void dropStats(TableIf table) throws DdlException { + TableStatsMeta tableStats = findTableStatsStatus(table.getId()); + if (tableStats == null) { + return; + } + Set cols = table.getBaseSchema().stream().map(Column::getName).collect(Collectors.toSet()); + for (String col : cols) { + tableStats.removeColumn(col); + Env.getCurrentEnv().getStatisticsCache().invalidate(table.getId(), -1L, col); + } + tableStats.updatedTime = 0; + logCreateTableStats(tableStats); + StatisticsRepository.dropStatistics(table.getId(), cols); + } + public void handleKillAnalyzeStmt(KillAnalysisJobStmt killAnalysisJobStmt) throws DdlException { Map analysisTaskMap = analysisJobIdToTaskMap.remove(killAnalysisJobStmt.jobId); if (analysisTaskMap == null) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java index 52299bd301c5f1..7ae7651421cd2a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java @@ -91,15 +91,21 @@ private void analyzeAll() { public void analyzeDb(DatabaseIf databaseIf) throws DdlException { List analysisInfos = constructAnalysisInfo(databaseIf); for (AnalysisInfo analysisInfo : analysisInfos) { - analysisInfo = getReAnalyzeRequiredPart(analysisInfo); - if (analysisInfo == null) { - continue; - } try { + if (needDropStaleStats(analysisInfo)) { + Env.getCurrentEnv().getAnalysisManager().dropStats(databaseIf.getTable(analysisInfo.tblId).get()); + continue; + } + analysisInfo = getReAnalyzeRequiredPart(analysisInfo); + if (analysisInfo == null) { + continue; + } createSystemAnalysisJob(analysisInfo); - } catch (Exception e) { - analysisInfo.message = e.getMessage(); - throw e; + } catch (Throwable t) { + analysisInfo.message = t.getMessage(); + LOG.warn("Failed to auto analyze table {}.{}, reason {}", + databaseIf.getFullName(), analysisInfo.tblId, analysisInfo.message, t); + continue; } } } @@ -191,4 +197,29 @@ protected AnalysisInfo getReAnalyzeRequiredPart(AnalysisInfo jobInfo) { return new AnalysisInfoBuilder(jobInfo).setColToPartitions(needRunPartitions).build(); } + /** + * Check if the given table should drop stale stats. User may truncate table, + * in this case, we need to drop the stale stats. + * @param jobInfo + * @return True if you need to drop, false otherwise. + */ + protected boolean needDropStaleStats(AnalysisInfo jobInfo) { + TableIf table = StatisticsUtil + .findTable(jobInfo.catalogId, jobInfo.dbId, jobInfo.tblId); + if (!(table instanceof OlapTable)) { + return false; + } + AnalysisManager analysisManager = Env.getServingEnv().getAnalysisManager(); + TableStatsMeta tblStats = analysisManager.findTableStatsStatus(table.getId()); + if (tblStats == null) { + return false; + } + if (tblStats.analyzeColumns().isEmpty()) { + return false; + } + if (table.getRowCount() == 0) { + return true; + } + return false; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index 212e61fa10aac8..8856d059a9a412 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -520,8 +520,7 @@ public static String replaceParams(String template, Map params) * * @param updatedRows The number of rows updated by the table * @param totalRows The current number of rows in the table - * the healthier the statistics of the table - * @return Health, the value range is [0, 100], the larger the value, + * @return Health, the value range is [0, 100], the larger the value, the healthier the statistics of the table. */ public static int getTableHealth(long totalRows, long updatedRows) { if (updatedRows >= totalRows) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java index f33a16f9b785b8..c210a60262dfab 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java @@ -358,7 +358,6 @@ public List getColumns() { .setColToPartitions(new HashMap<>()).setColName("col1").build(), olapTable); stats2.updatedRows.addAndGet(20); Assertions.assertFalse(olapTable.needReAnalyzeTable(stats2)); - } @Test diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java index d94bdd61248734..14c6f41384f100 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java @@ -27,6 +27,7 @@ import org.apache.doris.catalog.TableIf; import org.apache.doris.catalog.Type; import org.apache.doris.catalog.View; +import org.apache.doris.catalog.external.ExternalTable; import org.apache.doris.cluster.ClusterNamespace; import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; @@ -418,4 +419,77 @@ public TableIf findTable(long catalogId, long dbId, long tblId) { Assertions.assertNotNull(task.getTableSample()); } } + + @Test + public void testNeedDropStaleStats() { + + TableIf olapTable = new OlapTable(); + TableIf otherTable = new ExternalTable(); + + new MockUp() { + @Mock + public TableIf findTable(long catalogId, long dbId, long tblId) { + if (tblId == 0) { + return olapTable; + } else { + return otherTable; + } + } + }; + + new MockUp() { + int count = 0; + + int[] rowCounts = {100, 0}; + @Mock + public long getRowCount() { + return rowCounts[count++]; + } + + @Mock + public List getBaseSchema() { + return Lists.newArrayList(new Column("col1", Type.INT), new Column("col2", Type.INT)); + } + }; + + AnalysisInfo analysisInfoOlap = new AnalysisInfoBuilder().setAnalysisMethod(AnalysisMethod.FULL) + .setColToPartitions(new HashMap<>()) + .setAnalysisType(AnalysisType.FUNDAMENTALS) + .setColName("col1") + .setTblId(0) + .setJobType(JobType.SYSTEM).build(); + + new MockUp() { + int count = 0; + + TableStatsMeta[] tableStatsArr = + new TableStatsMeta[] {null, + new TableStatsMeta(0, analysisInfoOlap, olapTable), + new TableStatsMeta(0, analysisInfoOlap, olapTable)}; + + { + tableStatsArr[1].updatedRows.addAndGet(100); + tableStatsArr[2].updatedRows.addAndGet(0); + } + + + @Mock + public TableStatsMeta findTableStatsStatus(long tblId) { + return tableStatsArr[count++]; + } + }; + + AnalysisInfo analysisInfoOtherTable = new AnalysisInfoBuilder().setAnalysisMethod(AnalysisMethod.FULL) + .setColToPartitions(new HashMap<>()) + .setAnalysisType(AnalysisType.FUNDAMENTALS) + .setColName("col1") + .setTblId(1) + .setJobType(JobType.SYSTEM).build(); + + StatisticsAutoCollector statisticsAutoCollector = new StatisticsAutoCollector(); + Assertions.assertFalse(statisticsAutoCollector.needDropStaleStats(analysisInfoOtherTable)); + Assertions.assertFalse(statisticsAutoCollector.needDropStaleStats(analysisInfoOlap)); + Assertions.assertFalse(statisticsAutoCollector.needDropStaleStats(analysisInfoOlap)); + Assertions.assertTrue(statisticsAutoCollector.needDropStaleStats(analysisInfoOlap)); + } } From fa5baebe35071eac1023658c8ca55c953f1a27b5 Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Tue, 28 Nov 2023 21:39:35 +0800 Subject: [PATCH 13/50] [fix](statistics)Fix sample min max npe bug #27702 (#27707) backport #27702 --- .../doris/statistics/OlapAnalysisTask.java | 6 ++-- .../statistics/StatisticsRepository.java | 8 ++--- .../doris/statistics/util/StatisticsUtil.java | 17 ++++++++++ .../statistics/util/StatisticsUtilTest.java | 31 +++++++++++++++++++ 4 files changed, 52 insertions(+), 10 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java index 97cb10c520c7d8..04a763b688440a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java @@ -31,10 +31,8 @@ import com.google.common.annotations.VisibleForTesting; import org.apache.commons.text.StringSubstitutor; -import java.nio.charset.StandardCharsets; import java.security.SecureRandom; import java.util.ArrayList; -import java.util.Base64; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -94,8 +92,8 @@ protected void doSample() throws Exception { // Get basic stats, including min and max. ResultRow basicStats = collectBasicStat(r); long rowCount = tbl.getRowCount(); - String min = Base64.getEncoder().encodeToString(basicStats.get(0).getBytes(StandardCharsets.UTF_8)); - String max = Base64.getEncoder().encodeToString(basicStats.get(1).getBytes(StandardCharsets.UTF_8)); + String min = StatisticsUtil.encodeValue(basicStats, 0); + String max = StatisticsUtil.encodeValue(basicStats, 1); boolean limitFlag = false; long rowsToSample = pair.second; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java index 2b14d588308034..12ca6b4aa1f938 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java @@ -35,8 +35,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import java.nio.charset.StandardCharsets; -import java.util.Base64; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -271,10 +269,8 @@ public static void alterColumnStatistics(AlterColumnStatsStmt alterColumnStatsSt params.put("count", String.valueOf(columnStatistic.count)); params.put("ndv", String.valueOf(columnStatistic.ndv)); params.put("nullCount", String.valueOf(columnStatistic.numNulls)); - params.put("min", min == null ? "NULL" : - Base64.getEncoder().encodeToString(min.getBytes(StandardCharsets.UTF_8))); - params.put("max", max == null ? "NULL" : - Base64.getEncoder().encodeToString(max.getBytes(StandardCharsets.UTF_8))); + params.put("min", StatisticsUtil.encodeString(min)); + params.put("max", StatisticsUtil.encodeString(max)); params.put("dataSize", String.valueOf(columnStatistic.dataSize)); if (partitionIds.isEmpty()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index 8856d059a9a412..660cb874e9b885 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -90,12 +90,14 @@ import org.apache.logging.log4j.Logger; import java.net.InetSocketAddress; +import java.nio.charset.StandardCharsets; import java.text.SimpleDateFormat; import java.time.LocalTime; import java.time.format.DateTimeFormatter; import java.time.format.DateTimeParseException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Base64; import java.util.Collection; import java.util.Collections; import java.util.Date; @@ -935,4 +937,19 @@ public static int getAnalyzeTimeout() { return StatisticConstants.ANALYZE_TIMEOUT_IN_SEC; } + public static String encodeValue(ResultRow row, int index) { + if (row == null || row.getValues().size() <= index) { + return "NULL"; + } + return encodeString(row.get(index)); + } + + public static String encodeString(String value) { + if (value == null) { + return "NULL"; + } else { + return Base64.getEncoder().encodeToString(value.getBytes(StandardCharsets.UTF_8)); + } + } + } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java index 107a5f53822fe5..2c0854dcf2125b 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java @@ -20,14 +20,19 @@ import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; import org.apache.doris.qe.SessionVariable; +import org.apache.doris.statistics.ResultRow; +import com.google.common.collect.Lists; import mockit.Mock; import mockit.MockUp; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import java.nio.charset.StandardCharsets; import java.time.LocalTime; import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.Base64; public class StatisticsUtilTest { @Test @@ -111,4 +116,30 @@ protected SessionVariable findConfigFromGlobalSessionVar(String varName) throws now = "23:30:00"; Assertions.assertFalse(StatisticsUtil.inAnalyzeTime(LocalTime.parse(now, timeFormatter))); } + + + @Test + public void testEncodeValue() throws Exception { + Assertions.assertEquals("NULL", StatisticsUtil.encodeValue(null, 0)); + + ResultRow row = new ResultRow(null); + Assertions.assertEquals("NULL", StatisticsUtil.encodeValue(row, 0)); + + ArrayList values = Lists.newArrayList(); + values.add("a"); + row = new ResultRow(values); + Assertions.assertEquals("NULL", StatisticsUtil.encodeValue(row, 1)); + + values = Lists.newArrayList(); + values.add(null); + row = new ResultRow(values); + Assertions.assertEquals("NULL", StatisticsUtil.encodeValue(row, 0)); + + values.add("a"); + row = new ResultRow(values); + Assertions.assertEquals("NULL", StatisticsUtil.encodeValue(row, 0)); + Assertions.assertEquals(Base64.getEncoder() + .encodeToString("a".getBytes(StandardCharsets.UTF_8)), StatisticsUtil.encodeValue(row, 1)); + Assertions.assertEquals("NULL", StatisticsUtil.encodeValue(row, 2)); + } } From d791420b15da0124b3b26d3a9a1401761801f546 Mon Sep 17 00:00:00 2001 From: Pxl Date: Tue, 28 Nov 2023 22:04:17 +0800 Subject: [PATCH 14/50] [Bug](join) try fix wrong _has_null_in_build_side setted (#27684) (#27710) --- be/src/vec/exec/join/vhash_join_node.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index c8bf5c9b36d74d..5a92fc674cfd51 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -910,11 +910,6 @@ Status HashJoinNode::sink(doris::RuntimeState* state, vectorized::Block* in_bloc // make one block for each 4 gigabytes constexpr static auto BUILD_BLOCK_MAX_SIZE = 4 * 1024UL * 1024UL * 1024UL; - if (_has_null_in_build_side) { - // TODO: if _has_null_in_build_side is true we should finish current pipeline task. - DCHECK(state->enable_pipeline_exec()); - return Status::OK(); - } if (_should_build_hash_table) { // If eos or have already met a null value using short-circuit strategy, we do not need to pull // data from probe side. From f7b8021066bab5cc4cbb806bc6fddc2db422000d Mon Sep 17 00:00:00 2001 From: Calvin Kirs Date: Tue, 28 Nov 2023 23:29:28 +0800 Subject: [PATCH 15/50] [Fix](show-load)Show load npe(userinfo is null) (#27698) (#27719) --- .../main/java/org/apache/doris/load/loadv2/LoadJob.java | 8 ++++++-- .../java/org/apache/doris/load/loadv2/LoadManager.java | 5 +++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadJob.java index 2c70883d8f1dbe..2e3b0e6e868ec6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadJob.java @@ -819,8 +819,12 @@ public List getShowInfo() throws DdlException { jobInfo.add(transactionId); // error tablets jobInfo.add(errorTabletsToJson()); - // user - jobInfo.add(userInfo.getQualifiedUser()); + // user, some load job may not have user info + if (userInfo == null || userInfo.getQualifiedUser() == null) { + jobInfo.add(FeConstants.null_string); + } else { + jobInfo.add(userInfo.getQualifiedUser()); + } // comment jobInfo.add(comment); return jobInfo; diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadManager.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadManager.java index 3f05e2c5a9887a..dc02a0cf1451bb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadManager.java @@ -568,8 +568,9 @@ public List> getLoadJobInfosByDb(long dbId, String labelValue, } // add load job info loadJobInfos.add(loadJob.getShowInfo()); - } catch (DdlException e) { - continue; + } catch (RuntimeException | DdlException e) { + // ignore this load job + LOG.warn("get load job info failed. job id: {}", loadJob.getId(), e); } } return loadJobInfos; From e44f57403febb164bc7eb83707897de6dcff866a Mon Sep 17 00:00:00 2001 From: minghong Date: Wed, 29 Nov 2023 02:58:23 +0800 Subject: [PATCH 16/50] [pick](nereids)temporary partition is always pruned #27636 (#27722) --- .../java/org/apache/doris/catalog/PartitionInfo.java | 10 ++++++++++ .../rules/expression/rules/PartitionPruner.java | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionInfo.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionInfo.java index 6bd4604471a58b..a8033b788f1cad 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionInfo.java @@ -116,6 +116,16 @@ public Map getIdToItem(boolean isTemp) { } } + /** + * @return both normal partition and temp partition + */ + public Map getAllPartitions() { + HashMap all = new HashMap<>(); + all.putAll(idToTempItem); + all.putAll(idToItem); + return all; + } + public PartitionItem getItem(long partitionId) { PartitionItem item = idToItem.get(partitionId); if (item == null) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java index 6c932f558b021d..a033629547619d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java @@ -103,7 +103,7 @@ public List prune() { */ public static List prune(List partitionSlots, Expression partitionPredicate, PartitionInfo partitionInfo, CascadesContext cascadesContext, PartitionTableType partitionTableType) { - return prune(partitionSlots, partitionPredicate, partitionInfo.getIdToItem(false), cascadesContext, + return prune(partitionSlots, partitionPredicate, partitionInfo.getAllPartitions(), cascadesContext, partitionTableType); } From 7b17f241eefe2712a4521481133daf332d35bb9f Mon Sep 17 00:00:00 2001 From: AKIRA <33112463+Kikyou1997@users.noreply.github.com> Date: Wed, 29 Nov 2023 02:59:40 +0800 Subject: [PATCH 17/50] [enhancement](stats) limit bq cap size for analyze task #27685 (#27687) --- .../java/org/apache/doris/statistics/AnalysisManager.java | 3 ++- .../org/apache/doris/statistics/AnalysisTaskExecutor.java | 6 +++++- .../org/apache/doris/statistics/StatisticConstants.java | 2 +- .../apache/doris/statistics/StatisticsAutoCollector.java | 3 ++- .../org/apache/doris/statistics/StatisticsCollector.java | 6 ------ 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 9f1db45a28501a..159461f463ed4c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -240,7 +240,8 @@ public class AnalysisManager implements Writable { public AnalysisManager() { if (!Env.isCheckpointThread()) { - this.taskExecutor = new AnalysisTaskExecutor(Config.statistics_simultaneously_running_task_num); + this.taskExecutor = new AnalysisTaskExecutor(Config.statistics_simultaneously_running_task_num, + Integer.MAX_VALUE); this.statisticsCache = new StatisticsCache(); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskExecutor.java index fb4530837e4afd..3bdccaca047954 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisTaskExecutor.java @@ -43,11 +43,15 @@ public class AnalysisTaskExecutor { Comparator.comparingLong(AnalysisTaskWrapper::getStartTime)); public AnalysisTaskExecutor(int simultaneouslyRunningTaskNum) { + this(simultaneouslyRunningTaskNum, Integer.MAX_VALUE); + } + + public AnalysisTaskExecutor(int simultaneouslyRunningTaskNum, int taskQueueSize) { if (!Env.isCheckpointThread()) { executors = ThreadPoolManager.newDaemonThreadPool( simultaneouslyRunningTaskNum, simultaneouslyRunningTaskNum, 0, - TimeUnit.DAYS, new LinkedBlockingQueue<>(), + TimeUnit.DAYS, new LinkedBlockingQueue<>(taskQueueSize), new BlockedPolicy("Analysis Job Executor", Integer.MAX_VALUE), "Analysis Job Executor", true); cancelExpiredTask(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java index 9f1bd3bf681896..ffb80074eee7b0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java @@ -96,7 +96,7 @@ public class StatisticConstants { public static final int ANALYZE_TIMEOUT_IN_SEC = 43200; - public static final int SUBMIT_JOB_LIMIT = 5; + public static final int TASK_QUEUE_CAP = 10; static { SYSTEM_DBS.add(SystemInfoService.DEFAULT_CLUSTER diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java index 7ae7651421cd2a..2eac82c91b633e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java @@ -52,7 +52,8 @@ public class StatisticsAutoCollector extends StatisticsCollector { public StatisticsAutoCollector() { super("Automatic Analyzer", TimeUnit.MINUTES.toMillis(Config.auto_check_statistics_in_minutes), - new AnalysisTaskExecutor(Config.auto_analyze_simultaneously_running_task_num)); + new AnalysisTaskExecutor(Config.auto_analyze_simultaneously_running_task_num, + StatisticConstants.TASK_QUEUE_CAP)); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCollector.java index 63dcdab09ab4d2..9d4c311523b59c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCollector.java @@ -35,8 +35,6 @@ public abstract class StatisticsCollector extends MasterDaemon { protected final AnalysisTaskExecutor analysisTaskExecutor; - protected int submittedThisRound = StatisticConstants.SUBMIT_JOB_LIMIT; - public StatisticsCollector(String name, long intervalMs, AnalysisTaskExecutor analysisTaskExecutor) { super(name, intervalMs); this.analysisTaskExecutor = analysisTaskExecutor; @@ -54,7 +52,6 @@ protected void runAfterCatalogReady() { if (Env.isCheckpointThread()) { return; } - submittedThisRound = StatisticConstants.SUBMIT_JOB_LIMIT; if (Env.getCurrentEnv().getAnalysisManager().hasUnFinished()) { LOG.info("Analyze tasks those submitted in last time is not finished, skip"); return; @@ -72,9 +69,6 @@ protected void createSystemAnalysisJob(AnalysisInfo jobInfo) // No statistics need to be collected or updated return; } - if (submittedThisRound-- < 0) { - return; - } Map analysisTasks = new HashMap<>(); AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); analysisManager.createTaskForEachColumns(jobInfo, analysisTasks, false); From 008b95ca53bd33c8306b421776fca6761e9c17e0 Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Wed, 29 Nov 2023 03:01:25 +0800 Subject: [PATCH 18/50] [improvement](statistics) Add config for the threshold of column count for auto analyze #27713 (#27723) --- .../org/apache/doris/qe/SessionVariable.java | 9 ++++++ .../doris/statistics/StatisticConstants.java | 2 ++ .../statistics/StatisticsAutoCollector.java | 6 +++- .../doris/statistics/util/StatisticsUtil.java | 10 ++++++ .../StatisticsAutoCollectorTest.java | 31 +++++++++++++++++++ 5 files changed, 57 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index f5f139a66d8658..4923d29db1a25d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -415,6 +415,8 @@ public class SessionVariable implements Serializable, Writable { public static final String ENABLE_AUTO_ANALYZE = "enable_auto_analyze"; + public static final String AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD = "auto_analyze_table_width_threshold"; + public static final String FASTER_FLOAT_CONVERT = "faster_float_convert"; public static final String ENABLE_DECIMAL256 = "enable_decimal256"; @@ -1204,6 +1206,13 @@ public void setMaxJoinNumberOfReorder(int maxJoinNumberOfReorder) { flag = VariableMgr.GLOBAL) public boolean enableAutoAnalyze = true; + @VariableMgr.VarAttr(name = AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD, + description = {"参与自动收集的最大表宽度,列数多于这个参数的表不参与自动收集", + "Maximum table width to enable auto analyze, " + + "table with more columns than this value will not be auto analyzed."}, + flag = VariableMgr.GLOBAL) + public int autoAnalyzeTableWidthThreshold = 70; + @VariableMgr.VarAttr(name = AUTO_ANALYZE_START_TIME, needForward = true, checker = "checkAnalyzeTimeFormat", description = {"该参数定义自动ANALYZE例程的开始时间", "This parameter defines the start time for the automatic ANALYZE routine."}, diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java index ffb80074eee7b0..3e2b9c8bc287d5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java @@ -98,6 +98,8 @@ public class StatisticConstants { public static final int TASK_QUEUE_CAP = 10; + public static final int AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD = 70; + static { SYSTEM_DBS.add(SystemInfoService.DEFAULT_CLUSTER + ClusterNamespace.CLUSTER_DELIMITER + FeConstants.INTERNAL_DB_NAME); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java index 2eac82c91b633e..bcc7f53f4d118c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java @@ -182,9 +182,13 @@ protected void createAnalyzeJobForTbl(DatabaseIf db, protected AnalysisInfo getReAnalyzeRequiredPart(AnalysisInfo jobInfo) { TableIf table = StatisticsUtil .findTable(jobInfo.catalogId, jobInfo.dbId, jobInfo.tblId); + // Skip tables that are too width. + if (table.getBaseSchema().size() > StatisticsUtil.getAutoAnalyzeTableWidthThreshold()) { + return null; + } + AnalysisManager analysisManager = Env.getServingEnv().getAnalysisManager(); TableStatsMeta tblStats = analysisManager.findTableStatsStatus(table.getId()); - if (!table.needReAnalyzeTable(tblStats)) { return null; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index 660cb874e9b885..66b13ff87f1be8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -937,6 +937,16 @@ public static int getAnalyzeTimeout() { return StatisticConstants.ANALYZE_TIMEOUT_IN_SEC; } + public static int getAutoAnalyzeTableWidthThreshold() { + try { + return findConfigFromGlobalSessionVar(SessionVariable.AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD) + .autoAnalyzeTableWidthThreshold; + } catch (Exception e) { + LOG.warn("Failed to get value of auto_analyze_table_width_threshold, return default", e); + } + return StatisticConstants.AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD; + } + public static String encodeValue(ResultRow row, int index) { if (row == null || row.getValues().size() <= index) { return "NULL"; diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java index 14c6f41384f100..732196ef31b861 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java @@ -221,6 +221,37 @@ public AnalysisInfo getAnalysisJobInfo(AnalysisInfo jobInfo, TableIf table, Assertions.assertNotNull(statisticsAutoCollector.getReAnalyzeRequiredPart(analysisInfo2)); } + @Test + public void testSkipWideTable() { + + TableIf tableIf = new OlapTable(); + + new MockUp() { + @Mock + public List getBaseSchema() { + return Lists.newArrayList(new Column("col1", Type.INT), new Column("col2", Type.INT)); + } + }; + + new MockUp() { + int count = 0; + int [] thresholds = {1, 10}; + @Mock + public TableIf findTable(long catalogName, long dbName, long tblName) { + return tableIf; + } + + @Mock + public int getAutoAnalyzeTableWidthThreshold() { + return thresholds[count++]; + } + }; + AnalysisInfo analysisInfo = new AnalysisInfoBuilder().build(); + StatisticsAutoCollector statisticsAutoCollector = new StatisticsAutoCollector(); + Assertions.assertNull(statisticsAutoCollector.getReAnalyzeRequiredPart(analysisInfo)); + Assertions.assertNotNull(statisticsAutoCollector.getReAnalyzeRequiredPart(analysisInfo)); + } + @Test public void testLoop() { AtomicBoolean timeChecked = new AtomicBoolean(); From 8fd4aa2083b54a50911f132b6b68981f00a77db5 Mon Sep 17 00:00:00 2001 From: catpineapple <42031973+catpineapple@users.noreply.github.com> Date: Wed, 29 Nov 2023 10:51:37 +0800 Subject: [PATCH 19/50] [doc](fix) k8s operator docs fix to 2.0 (#27476) --- docs/en/docs/install/k8s-deploy.md | 2 +- docs/zh-CN/docs/install/k8s-deploy.md | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/docs/install/k8s-deploy.md b/docs/en/docs/install/k8s-deploy.md index 10b845a64db977..659fcd0d544891 100644 --- a/docs/en/docs/install/k8s-deploy.md +++ b/docs/en/docs/install/k8s-deploy.md @@ -77,7 +77,7 @@ User can directly deploy Doris by [examples](https://github.com/selectdb/doris-o ```shell kubectl apply -f https://raw.githubusercontent.com/selectdb/doris-operator/master/doc/examples/doriscluster-sample.yaml ``` -Or download [doriscluster-sample](https://github.com/selectdb/doris-operator/master/doc/examples/doriscluster-sample.yaml) a custom resource that tells the Operator how to configure the Kubernetes cluster, and custom resource as [api.md](https://github.com/selectdb/doris-operator/blob/master/doc/api.md) and +Or download [doriscluster-sample](https://github.com/selectdb/doris-operator/tree/master/doc/examples/doriscluster-sample.yaml) a custom resource that tells the Operator how to configure the Kubernetes cluster, and custom resource as [api.md](https://github.com/selectdb/doris-operator/blob/master/doc/api.md) and [how_to_use](https://github.com/selectdb/doris-operator/tree/master/doc/how_to_use.md) docs. Instead of using the command below, apply the customized resource. ```shell kubeectl apply -f doriscluster-sample.yaml diff --git a/docs/zh-CN/docs/install/k8s-deploy.md b/docs/zh-CN/docs/install/k8s-deploy.md index 180322f613b270..36093d758d6553 100644 --- a/docs/zh-CN/docs/install/k8s-deploy.md +++ b/docs/zh-CN/docs/install/k8s-deploy.md @@ -70,11 +70,11 @@ Operator 服务部署后,可通过如下命令查看服务的状态。当`STAT operator.yaml 中 namespace 默认为 Doris,如果更改了 namespace,在查询服务状态的时候请替换正确的 namespace 名称。 ### 部署 Doris 集群 **1. 部署集群** -`Doris-Operator`仓库的 [doc/examples ](https://github.com/selectdb/doris-operator/tree/master/doc/examples)目录提供众多场景的使用范例,可直接使用范例进行部署。以最基础的范例为例: +`Doris-Operator`仓库的 [doc/examples](https://github.com/selectdb/doris-operator/tree/master/doc/examples) 目录提供众多场景的使用范例,可直接使用范例进行部署。以最基础的范例为例: ``` kubectl apply -f https://raw.githubusercontent.com/selectdb/doris-operator/master/doc/examples/doriscluster-sample.yaml ``` -在 Doris-Operator 仓库中,[how_to_use.md](https://github.com/selectdb/doris-operator/tree/master/doc/how_to_use.md) 梳理了 Operator 管理运维 Doris 集群的主要能力,[DorisCluster](https://github.com/selectdb/doris-operator/blob/master/api/doris/v1/types.go) 展示了资源定义和从属结构,[api.md](https://github.com/selectdb/doris-operator/tree/master/doc/api.md) 可读性展示了资源定义和从属结构。可根据相关文档规划部署 Doris 集群。 +在 Doris-Operator 仓库中,[how_to_use.md](https://github.com/selectdb/doris-operator/tree/master/doc/how_to_use_cn.md) 梳理了 Operator 管理运维 Doris 集群的主要能力,[DorisCluster](https://github.com/selectdb/doris-operator/blob/master/api/doris/v1/types.go) 展示了资源定义和从属结构,[api.md](https://github.com/selectdb/doris-operator/tree/master/doc/api.md) 可读性展示了资源定义和从属结构。可根据相关文档规划部署 Doris 集群。 **2. 检测集群状态** - 检查所有 pod 的状态 @@ -118,5 +118,5 @@ Doris 集群部署默认不提供 K8S 外部访问,如果集群需要被集群 mysql -h a7509284bf3784983a596c6eec7fc212-618xxxxxx.com -uroot -P9030 ``` ### 后记 -本文简述 Doris 在 Kubernetes 的部署使用,Doris-Operator 提供的其他能力请参看[主要能力介绍](https://github.com/selectdb/doris-operator/tree/master/doc/how_to_use.md),DorisCluster 资源的 [api](https://github.com/selectdb/doris-operator/blob/master/doc/api.md) 可读性文档定制化部署 Doris 集群。 +本文简述 Doris 在 Kubernetes 的部署使用,Doris-Operator 提供的其他能力请参看[主要能力介绍](https://github.com/selectdb/doris-operator/tree/master/doc/how_to_use_cn.md),DorisCluster 资源的 [api](https://github.com/selectdb/doris-operator/blob/master/doc/api.md) 可读性文档定制化部署 Doris 集群。 From dd20b152eba1ac5bd8ab9457799aa7a085fc8b30 Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Wed, 29 Nov 2023 13:59:40 +0800 Subject: [PATCH 20/50] [Improvement](planner)support select tablets with nereids optimize #23164 #23365 (#27740) #23164 #23365 --- .../org/apache/doris/nereids/DorisParser.g4 | 7 +- .../nereids/analyzer/UnboundRelation.java | 23 ++-- .../nereids/parser/LogicalPlanBuilder.java | 9 +- .../nereids/rules/analysis/BindRelation.java | 5 +- .../rules/rewrite/PruneOlapScanTablet.java | 20 ++-- .../trees/copier/LogicalPlanDeepCopier.java | 6 +- .../trees/plans/logical/LogicalOlapScan.java | 8 +- .../select_tablets/select_with_tablets.out | 58 ++++++++++ .../select_tablets/select_with_tablets.groovy | 104 ++++++++++++++++++ 9 files changed, 213 insertions(+), 27 deletions(-) create mode 100644 regression-test/data/nereids_p0/select_tablets/select_with_tablets.out create mode 100644 regression-test/suites/nereids_p0/select_tablets/select_with_tablets.groovy diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 index 4ae7f668f27f10..8c9ece2799f7e0 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 @@ -259,7 +259,8 @@ identifierSeq ; relationPrimary - : multipartIdentifier specifiedPartition? tableAlias sample? relationHint? lateralView* #tableName + : multipartIdentifier specifiedPartition? + tabletList? tableAlias sample? relationHint? lateralView* #tableName | LEFT_PAREN query RIGHT_PAREN tableAlias lateralView* #aliasedQuery | tvfName=identifier LEFT_PAREN (properties+=property (COMMA properties+=property)*)? @@ -280,6 +281,10 @@ multipartIdentifier : parts+=errorCapturingIdentifier (DOT parts+=errorCapturingIdentifier)* ; +tabletList + : TABLET LEFT_PAREN tabletIdList+=INTEGER_VALUE (COMMA tabletIdList+=INTEGER_VALUE)* RIGHT_PAREN + ; + // -----------------Expression----------------- namedExpression : expression (AS? (identifierOrText))? diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundRelation.java index 30485fe66f8d29..dd0033e9e5d010 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/analyzer/UnboundRelation.java @@ -47,35 +47,37 @@ public class UnboundRelation extends LogicalRelation implements Unbound { private final List nameParts; private final List partNames; + private final List tabletIds; private final boolean isTempPart; private final List hints; private final Optional tableSample; public UnboundRelation(RelationId id, List nameParts) { - this(id, nameParts, Optional.empty(), Optional.empty(), ImmutableList.of(), false, + this(id, nameParts, Optional.empty(), Optional.empty(), ImmutableList.of(), false, ImmutableList.of(), ImmutableList.of(), Optional.empty()); } public UnboundRelation(RelationId id, List nameParts, List partNames, boolean isTempPart) { - this(id, nameParts, Optional.empty(), Optional.empty(), partNames, isTempPart, + this(id, nameParts, Optional.empty(), Optional.empty(), partNames, isTempPart, ImmutableList.of(), ImmutableList.of(), Optional.empty()); } public UnboundRelation(RelationId id, List nameParts, List partNames, boolean isTempPart, - List hints, Optional tableSample) { + List tabletIds, List hints, Optional tableSample) { this(id, nameParts, Optional.empty(), Optional.empty(), - partNames, isTempPart, hints, tableSample); + partNames, isTempPart, tabletIds, hints, tableSample); } /** - * Constructor. + * constructor of UnboundRelation */ public UnboundRelation(RelationId id, List nameParts, Optional groupExpression, Optional logicalProperties, List partNames, boolean isTempPart, - List hints, Optional tableSample) { + List tabletIds, List hints, Optional tableSample) { super(id, PlanType.LOGICAL_UNBOUND_RELATION, groupExpression, logicalProperties); this.nameParts = ImmutableList.copyOf(Objects.requireNonNull(nameParts, "nameParts should not null")); this.partNames = ImmutableList.copyOf(Objects.requireNonNull(partNames, "partNames should not null")); + this.tabletIds = ImmutableList.copyOf(Objects.requireNonNull(tabletIds, "tabletIds should not null")); this.isTempPart = isTempPart; this.hints = ImmutableList.copyOf(Objects.requireNonNull(hints, "hints should not be null.")); this.tableSample = tableSample; @@ -99,15 +101,14 @@ public LogicalProperties computeLogicalProperties() { public Plan withGroupExpression(Optional groupExpression) { return new UnboundRelation(relationId, nameParts, groupExpression, Optional.of(getLogicalProperties()), - partNames, isTempPart, hints, tableSample); - + partNames, isTempPart, tabletIds, hints, tableSample); } @Override public Plan withGroupExprLogicalPropChildren(Optional groupExpression, Optional logicalProperties, List children) { return new UnboundRelation(relationId, nameParts, groupExpression, logicalProperties, partNames, - isTempPart, hints, tableSample); + isTempPart, tabletIds, hints, tableSample); } @Override @@ -146,6 +147,10 @@ public boolean isTempPart() { return isTempPart; } + public List getTabletIds() { + return tabletIds; + } + public List getHints() { return hints; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index 19a2db2f0d547d..38ebb65c3541f1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -630,6 +630,13 @@ public LogicalPlan visitTableName(TableNameContext ctx) { } } + List tabletIdLists = new ArrayList<>(); + if (ctx.tabletList() != null) { + ctx.tabletList().tabletIdList.stream().forEach(tabletToken -> { + tabletIdLists.add(Long.parseLong(tabletToken.getText())); + }); + } + final List relationHints; if (ctx.relationHint() != null) { relationHints = typedVisit(ctx.relationHint()); @@ -640,7 +647,7 @@ public LogicalPlan visitTableName(TableNameContext ctx) { TableSample tableSample = ctx.sample() == null ? null : (TableSample) visit(ctx.sample()); LogicalPlan checkedRelation = withCheckPolicy( new UnboundRelation(StatementScopeIdGenerator.newRelationId(), - tableId, partitionNames, isTempPart, relationHints, + tableId, partitionNames, isTempPart, tabletIdLists, relationHints, Optional.ofNullable(tableSample))); LogicalPlan plan = withTableAlias(checkedRelation, ctx.tableAlias()); for (LateralViewContext lateralViewContext : ctx.lateralView()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java index e23c3faa24398b..badefd91b3c60e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java @@ -167,13 +167,14 @@ private LogicalPlan bind(CascadesContext cascadesContext, UnboundRelation unboun private LogicalPlan makeOlapScan(TableIf table, UnboundRelation unboundRelation, List tableQualifier) { LogicalOlapScan scan; List partIds = getPartitionIds(table, unboundRelation); + List tabletIds = unboundRelation.getTabletIds(); if (!CollectionUtils.isEmpty(partIds)) { scan = new LogicalOlapScan(unboundRelation.getRelationId(), (OlapTable) table, ImmutableList.of(tableQualifier.get(1)), partIds, - unboundRelation.getHints(), unboundRelation.getTableSample()); + tabletIds, unboundRelation.getHints(), unboundRelation.getTableSample()); } else { scan = new LogicalOlapScan(unboundRelation.getRelationId(), - (OlapTable) table, ImmutableList.of(tableQualifier.get(1)), unboundRelation.getHints(), + (OlapTable) table, ImmutableList.of(tableQualifier.get(1)), tabletIds, unboundRelation.getHints(), unboundRelation.getTableSample()); } if (!Util.showHiddenColumns() && scan.getTable().hasDeleteSign() diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanTablet.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanTablet.java index 1b26f6b3f8bec1..0b079d781687ec 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanTablet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanTablet.java @@ -55,14 +55,18 @@ public Rule build() { LogicalOlapScan olapScan = filter.child(); OlapTable table = olapScan.getTable(); Builder selectedTabletIdsBuilder = ImmutableList.builder(); - for (Long id : olapScan.getSelectedPartitionIds()) { - Partition partition = table.getPartition(id); - MaterializedIndex index = partition.getIndex(olapScan.getSelectedIndexId()); - selectedTabletIdsBuilder - .addAll(getSelectedTabletIds(filter.getConjuncts(), index, - olapScan.getSelectedIndexId() == olapScan.getTable() - .getBaseIndexId(), - partition.getDistributionInfo())); + if (olapScan.getSelectedTabletIds().isEmpty()) { + for (Long id : olapScan.getSelectedPartitionIds()) { + Partition partition = table.getPartition(id); + MaterializedIndex index = partition.getIndex(olapScan.getSelectedIndexId()); + selectedTabletIdsBuilder + .addAll(getSelectedTabletIds(filter.getConjuncts(), index, + olapScan.getSelectedIndexId() == olapScan.getTable() + .getBaseIndexId(), + partition.getDistributionInfo())); + } + } else { + selectedTabletIdsBuilder.addAll(olapScan.getSelectedTabletIds()); } List selectedTabletIds = selectedTabletIdsBuilder.build(); if (new HashSet(selectedTabletIds).equals(new HashSet(olapScan.getSelectedTabletIds()))) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/copier/LogicalPlanDeepCopier.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/copier/LogicalPlanDeepCopier.java index 9c05bae461d478..45aa14f6e46408 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/copier/LogicalPlanDeepCopier.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/copier/LogicalPlanDeepCopier.java @@ -160,11 +160,13 @@ public Plan visitLogicalOlapScan(LogicalOlapScan olapScan, DeepCopierContext con LogicalOlapScan newOlapScan; if (olapScan.getManuallySpecifiedPartitions().isEmpty()) { newOlapScan = new LogicalOlapScan(StatementScopeIdGenerator.newRelationId(), - olapScan.getTable(), olapScan.getQualifier(), olapScan.getHints(), olapScan.getTableSample()); + olapScan.getTable(), olapScan.getQualifier(), olapScan.getSelectedTabletIds(), + olapScan.getHints(), olapScan.getTableSample()); } else { newOlapScan = new LogicalOlapScan(StatementScopeIdGenerator.newRelationId(), olapScan.getTable(), olapScan.getQualifier(), - olapScan.getManuallySpecifiedPartitions(), olapScan.getHints(), olapScan.getTableSample()); + olapScan.getManuallySpecifiedPartitions(), olapScan.getSelectedTabletIds(), + olapScan.getHints(), olapScan.getTableSample()); } newOlapScan.getOutput(); context.putRelation(olapScan.getRelationId(), newOlapScan); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java index 21680d4e905998..df8cefe350a5cf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java @@ -118,19 +118,19 @@ public LogicalOlapScan(RelationId id, OlapTable table, List qualifier) { Maps.newHashMap(), Optional.empty()); } - public LogicalOlapScan(RelationId id, OlapTable table, List qualifier, + public LogicalOlapScan(RelationId id, OlapTable table, List qualifier, List tabletIds, List hints, Optional tableSample) { this(id, table, qualifier, Optional.empty(), Optional.empty(), - table.getPartitionIds(), false, ImmutableList.of(), + table.getPartitionIds(), false, tabletIds, -1, false, PreAggStatus.on(), ImmutableList.of(), hints, Maps.newHashMap(), tableSample); } public LogicalOlapScan(RelationId id, OlapTable table, List qualifier, List specifiedPartitions, - List hints, Optional tableSample) { + List tabletIds, List hints, Optional tableSample) { this(id, table, qualifier, Optional.empty(), Optional.empty(), // must use specifiedPartitions here for prune partition by sql like 'select * from t partition p1' - specifiedPartitions, false, ImmutableList.of(), + specifiedPartitions, false, tabletIds, -1, false, PreAggStatus.on(), specifiedPartitions, hints, Maps.newHashMap(), tableSample); } diff --git a/regression-test/data/nereids_p0/select_tablets/select_with_tablets.out b/regression-test/data/nereids_p0/select_tablets/select_with_tablets.out new file mode 100644 index 00000000000000..20fa99e3420c31 --- /dev/null +++ b/regression-test/data/nereids_p0/select_tablets/select_with_tablets.out @@ -0,0 +1,58 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select1 -- +1 doris 19 +2 nereids 18 + +-- !select2 -- +1 doris 19 +2 nereids 18 + +-- !select3 -- +1 doris 19 +2 nereids 18 + +-- !select4 -- + +-- !select5 -- +1 doris 19 + +-- !select6 -- +2 nereids 18 + +-- !select7 -- +1 doris 19 + +-- !select8 -- +1 doris 19 + +-- !select9 -- + +-- !select10 -- +1 doris 19 + +-- !select11 -- + +-- !select12 -- + +-- !select13 -- + +-- !no_partition_1 -- +1 doris 19 +2 nereids 18 + +-- !no_partition_2 -- +2 nereids 18 + +-- !no_partition_3 -- +1 doris 19 + +-- !no_partition_4 -- + +-- !no_partition_5 -- +2 nereids 18 + +-- !no_partition_6 -- +2 nereids 18 + +-- !no_partition_7 -- + diff --git a/regression-test/suites/nereids_p0/select_tablets/select_with_tablets.groovy b/regression-test/suites/nereids_p0/select_tablets/select_with_tablets.groovy new file mode 100644 index 00000000000000..f2c2546b0241d2 --- /dev/null +++ b/regression-test/suites/nereids_p0/select_tablets/select_with_tablets.groovy @@ -0,0 +1,104 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("select_with_tablets") { + sql 'set enable_nereids_planner=true;' + sql 'set enable_fallback_to_original_planner=false;' + + + def table_name1 = "test_table" + sql """ DROP TABLE IF EXISTS ${table_name1} """ + sql """ + CREATE TABLE IF NOT EXISTS ${table_name1} ( + `id` int(11) NULL, + `name` string NULL, + `age` int(11) NULL + ) + PARTITION BY RANGE(id) + ( + PARTITION less_than_20 VALUES LESS THAN ("20"), + PARTITION between_20_70 VALUES [("20"),("70")), + PARTITION more_than_70 VALUES LESS THAN ("151") + ) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES("replication_num" = "1"); + """ + + sql """ INSERT INTO ${table_name1} VALUES (1, 'doris', 19), (2, 'nereids', 18) """ + def insert_res = sql "show last insert;" + + logger.info("insert result: " + insert_res.toString()) + order_qt_select1 """ SELECT * FROM ${table_name1} """ + + def res = sql """ show tablets from ${table_name1} where version = 2 """ + assertTrue(res.size() == 1) + assertTrue(res[0].size() == 21) + assertEquals("2", res[0][4]) + + order_qt_select2 """ SELECT * FROM ${table_name1} TABLET(${res[0][0]}) """ + order_qt_select3 """ SELECT * FROM ${table_name1} PARTITION less_than_20 TABLET(${res[0][0]}) """ + // result should be empty because TABLET(${res[0][0]}) is not belonged to partition between_20_70. + order_qt_select4 """ SELECT * FROM ${table_name1} PARTITION between_20_70 TABLET(${res[0][0]}) """ + + order_qt_select5 """ SELECT * FROM ${table_name1} where id < 2 """ + order_qt_select6 """ SELECT * FROM ${table_name1} TABLET(${res[0][0]}) where id = 2 """ + order_qt_select7 """ SELECT * FROM ${table_name1} TABLET(${res[0][0]}) where id < 2 """ + order_qt_select8 """ SELECT * FROM ${table_name1} PARTITION less_than_20 TABLET(${res[0][0]}) where id < 2 """ + // result of order_qt_select9 should be empty + order_qt_select9 """ SELECT * FROM ${table_name1} PARTITION between_20_70 TABLET(${res[0][0]}) where id < 2""" + order_qt_select10 """ SELECT * FROM ${table_name1} PARTITION less_than_20 where id < 2""" + // result of order_qt_select11 should be empty + order_qt_select11 """ SELECT * FROM ${table_name1} PARTITION between_20_70 where id < 2""" + + res = sql """ show tablets from ${table_name1} where version = 1 """ + assertTrue(res.size() == 2) + assertEquals("1", res[0][4]) + assertEquals("1", res[1][4]) + // result should be empty because TABLET(${res[0][0]}) does not have data. + order_qt_select12 """ SELECT * FROM ${table_name1} TABLET(${res[0][0]}) """ + // result should be empty because TABLET(${res[1][0]}) does not have data. + order_qt_select13 """ SELECT * FROM ${table_name1} TABLET(${res[1][0]}) """ + + // Test non-partitioned table + def table_no_partition = "table_no_partition" + sql """ DROP TABLE IF EXISTS ${table_no_partition} """ + sql """ + CREATE TABLE IF NOT EXISTS ${table_no_partition} ( + `id` int(11) NULL, + `name` string NULL, + `age` int(11) NULL + ) + DISTRIBUTED BY HASH(id) BUCKETS 3 + PROPERTIES("replication_num" = "1"); + """ + + sql """ INSERT INTO ${table_no_partition} VALUES (1, 'doris', 19), (2, 'nereids', 18) """ + insert_res = sql "show last insert;" + + logger.info("insert result: " + insert_res.toString()) + order_qt_no_partition_1 """ SELECT * FROM ${table_no_partition} """ + + res = sql """ show tablets from ${table_no_partition} where version = 2 """ + + order_qt_no_partition_2 """ SELECT * FROM ${table_no_partition} TABLET(${res[0][0]}) """ + order_qt_no_partition_3 """ SELECT * FROM ${table_no_partition} TABLET(${res[1][0]}) """ + order_qt_no_partition_4 """ SELECT * FROM ${table_no_partition} TABLET(${res[2][0]}) """ + + order_qt_no_partition_5 """ SELECT * FROM ${table_no_partition} where id = 2 """ + order_qt_no_partition_6 """ SELECT * FROM ${table_no_partition} TABLET(${res[0][0]}) where id = 2 """ + order_qt_no_partition_7 """ SELECT * FROM ${table_no_partition} TABLET(${res[0][0]}) where id > 2 """ +} From 1ee2750c4c6b0c9abcd5f9b0f38edf6015ef2ce7 Mon Sep 17 00:00:00 2001 From: amory Date: Wed, 29 Nov 2023 18:23:17 +0800 Subject: [PATCH 21/50] [FIX](complextype)fix complex type hash equals (#27743) --- .../apache/doris/analysis/ArrayLiteral.java | 18 +++++ .../org/apache/doris/analysis/MapLiteral.java | 19 +++++ .../apache/doris/analysis/StructLiteral.java | 18 +++++ .../sql_functions/array_functions/sql/q04.out | 24 +++++++ .../sql_functions/array_functions/sql/q04.sql | 72 +++++++++++++++++++ 5 files changed, 151 insertions(+) create mode 100644 regression-test/data/query_p0/sql_functions/array_functions/sql/q04.out create mode 100644 regression-test/suites/query_p0/sql_functions/array_functions/sql/q04.sql diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ArrayLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ArrayLiteral.java index ffd8f8f0ba9dbf..66145cb59608c6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ArrayLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ArrayLiteral.java @@ -31,6 +31,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.Objects; public class ArrayLiteral extends LiteralExpr { @@ -127,6 +128,23 @@ protected void toThrift(TExprNode msg) { msg.setChildType(((ArrayType) type).getItemType().getPrimitiveType().toThrift()); } + @Override + public int hashCode() { + return Objects.hashCode(children); + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof ArrayLiteral)) { + return false; + } + if (this == o) { + return true; + } + ArrayLiteral that = (ArrayLiteral) o; + return Objects.equals(children, that.children); + } + @Override public void write(DataOutput out) throws IOException { super.write(out); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java index 1130868e662c4b..62aead25e24a06 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/MapLiteral.java @@ -33,6 +33,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.Objects; // INSERT INTO table_map VALUES ({'key1':1, 'key2':10, 'k3':100}), ({'key1':2,'key2':20}), ({'key1':3,'key2':30}); @@ -206,4 +207,22 @@ public void write(DataOutput out) throws IOException { Expr.writeTo(e, out); } } + + @Override + public int hashCode() { + return Objects.hashCode(children); + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof MapLiteral)) { + return false; + } + if (this == o) { + return true; + } + + MapLiteral that = (MapLiteral) o; + return Objects.equals(children, that.children); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java index 0198826eec6bbd..d4327fb979b364 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java @@ -32,6 +32,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.Objects; public class StructLiteral extends LiteralExpr { // only for persist @@ -164,4 +165,21 @@ public void checkValueValid() throws AnalysisException { e.checkValueValid(); } } + + public int hashCode() { + return Objects.hashCode(children); + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof StructLiteral)) { + return false; + } + if (this == o) { + return true; + } + + StructLiteral that = (StructLiteral) o; + return Objects.equals(children, that.children); + } } diff --git a/regression-test/data/query_p0/sql_functions/array_functions/sql/q04.out b/regression-test/data/query_p0/sql_functions/array_functions/sql/q04.out new file mode 100644 index 00000000000000..e2e563600d25f2 --- /dev/null +++ b/regression-test/data/query_p0/sql_functions/array_functions/sql/q04.out @@ -0,0 +1,24 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !q04 -- +0 + +-- !q04_2 -- +0 + +-- !q04_3 -- +1 + +-- !q04_4 -- +1 + +-- !q04_5 -- +1 + +-- !q04_6 -- +1 + +-- !q04_7 -- +22,25,34 + +-- !q04_8 -- + diff --git a/regression-test/suites/query_p0/sql_functions/array_functions/sql/q04.sql b/regression-test/suites/query_p0/sql_functions/array_functions/sql/q04.sql new file mode 100644 index 00000000000000..0e26d95d0606a6 --- /dev/null +++ b/regression-test/suites/query_p0/sql_functions/array_functions/sql/q04.sql @@ -0,0 +1,72 @@ +DROP TABLE IF EXISTS ads_pi_cuser_all_info; +CREATE TABLE IF NOT EXISTS ads_pi_cuser_all_info ( + `corp_id` varchar(64) NOT NULL COMMENT '机构ID', + `staff_id` varchar(64) NOT NULL COMMENT '客户经理ID', + `external_user_id` varchar(64) NOT NULL COMMENT '外部联系人ID', + `is_deleted` int(11) REPLACE_IF_NOT_NULL NULL COMMENT '删除好友标识', + `main_id` largeint(40) REPLACE_IF_NOT_NULL NULL COMMENT '用户main_id', + `birthday` varchar(32) REPLACE_IF_NOT_NULL NULL COMMENT '客户生日', + `gender` tinyint(4) REPLACE_IF_NOT_NULL NULL COMMENT '用户性别', + `avater` text REPLACE_IF_NOT_NULL NULL COMMENT '用户头像地址', + `name` text REPLACE_IF_NOT_NULL NULL COMMENT '用户姓名', + `remark_name` text REPLACE_IF_NOT_NULL NULL COMMENT '客户经理备注姓名', + `type` tinyint(4) REPLACE_IF_NOT_NULL NULL, + `client_number` varchar(64) REPLACE_IF_NOT_NULL NULL COMMENT '上一次行为码', + `M0000001` int(11) SUM NOT NULL DEFAULT "0" COMMENT '预设用例分值,1000000取余', + `M0000002` int(11) SUM NOT NULL DEFAULT "0" COMMENT '预设用例分值,1000000取余', + `M0000003` int(11) SUM NOT NULL DEFAULT "0" COMMENT '预设用例分值,1000000取余', + `M0000004` int(11) SUM NOT NULL DEFAULT "0" COMMENT '预设用例分值,1000000取余', + `M0000005` int(11) SUM NOT NULL DEFAULT "0" COMMENT '预设用例分值,1000000取余', + `M0000006` int(11) SUM NOT NULL DEFAULT "0" COMMENT '预设用例分值,1000000取余', + `M0000007` int(11) SUM NOT NULL DEFAULT "0" COMMENT '预设用例分值,1000000取余', + `M0000008` int(11) SUM NOT NULL DEFAULT "0" COMMENT '预设用例分值,1000000取余', + `M0000009` int(11) SUM NOT NULL DEFAULT "0" COMMENT '预设用例分值,1000000取余', + `M0000010` int(11) SUM NOT NULL DEFAULT "0" COMMENT '预设用例分值,1000000取余', + `M0000011` int(11) SUM NOT NULL DEFAULT "0" COMMENT '预设用例分值,1000000取余', + `M0000012` int(11) SUM NOT NULL DEFAULT "0" COMMENT '预设用例分值,1000000取余', + `M0000013` int(11) SUM NOT NULL DEFAULT "0" COMMENT '预设用例分值,1000000取余', + `M0000014` int(11) SUM NOT NULL DEFAULT "0" COMMENT '预设用例分值,1000000取余', + `M0000015` int(11) SUM NOT NULL DEFAULT "0" COMMENT '预设用例分值,1000000取余', + `M0000016` int(11) SUM NOT NULL DEFAULT "0" COMMENT '预设用例分值,1000000取余', + `M0000017` int(11) SUM NOT NULL DEFAULT "0" COMMENT '预设用例分值,1000000取余', + `M0000018` int(11) SUM NOT NULL DEFAULT "0" COMMENT '预设用例分值,1000000取余', + `M0000019` int(11) SUM NOT NULL DEFAULT "0" COMMENT '预设用例分值,1000000取余', + `M0000020` int(11) SUM NOT NULL DEFAULT "0" COMMENT '预设用例分值,1000000取余', + `product_reach_times` int(11) SUM NOT NULL DEFAULT "0" COMMENT '本周产品触达次数,每自然周自动清零', + `common_reach_times` int(11) SUM NOT NULL DEFAULT "0" COMMENT '本周通用触达次数,每自然周自动清零', + `last_action` varchar(64) REPLACE_IF_NOT_NULL NULL COMMENT '上一次行为码', + `qw_tag_ids` text REPLACE_IF_NOT_NULL NULL COMMENT '企微标签,使用逗号分隔', + `stgy_tag_ids` text REPLACE_IF_NOT_NULL NULL COMMENT '企微标签,使用逗号分隔', + `last_reached_task_id` largeint(40) REPLACE_IF_NOT_NULL NULL COMMENT '上一次触达任务ID', + `last_reached_task_score` int(11) REPLACE_IF_NOT_NULL NULL COMMENT '上一次触达阶段分值', + `current_max_score_mode` text REPLACE_IF_NOT_NULL NULL COMMENT '分值最高的用例字段名,逗号分隔' +) ENGINE=OLAP +AGGREGATE KEY(`corp_id`, `staff_id`, `external_user_id`) +COMMENT 'OLAP' +DISTRIBUTED BY HASH(`staff_id`) BUCKETS 3 +PROPERTIES ( +"replication_allocation" = "tag.location.default: 1", +"is_being_synced" = "false", +"storage_format" = "V2", +"light_schema_change" = "true", +"disable_auto_compaction" = "false", +"enable_single_replica_compaction" = "false" +); + +INSERT INTO ads_pi_cuser_all_info +(corp_id, staff_id, external_user_id, is_deleted, main_id, birthday, gender, avater, name, remark_name, `type`, client_number, M0000001, M0000002, M0000003, M0000004, M0000005, M0000006, M0000007, M0000008, M0000009, M0000010, M0000011, M0000012, M0000013, M0000014, M0000015, M0000016, M0000017, M0000018, M0000019, M0000020, product_reach_times, common_reach_times, last_action, qw_tag_ids, stgy_tag_ids, last_reached_task_id, last_reached_task_score, current_max_score_mode) +VALUES('ww36b98e83f52f6bcc', '0af73fc236bf444aadc801cd4c416539', 'wmfvPXDAAA7OrmzTSkEVRkphuGx3hSVA', 0, '1604732822185627669', '', 2, 'http://wx.qlogo.cn/mmhead/9M0PhLTmTIeHGOibG2yxg90drr4nhu6NuJ5O4J9bskXicNShwsiaukk6g/0', 'mate20.0410q', 'mate20.0410q', 1, '', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, '45,34,22', NULL, '275', 0, NULL); +INSERT INTO ads_pi_cuser_all_info +(corp_id, staff_id, external_user_id, is_deleted, main_id, birthday, gender, avater, name, remark_name, `type`, client_number, M0000001, M0000002, M0000003, M0000004, M0000005, M0000006, M0000007, M0000008, M0000009, M0000010, M0000011, M0000012, M0000013, M0000014, M0000015, M0000016, M0000017, M0000018, M0000019, M0000020, product_reach_times, common_reach_times, last_action, qw_tag_ids, stgy_tag_ids, last_reached_task_id, last_reached_task_score, current_max_score_mode) +VALUES('ww36b98e83f52f6bcc', '0af73fc236bf444aadc801cd4c416539', 'wmfvPXDAAAHCcW-cFR5U2yPG5zfAS4rg', 0, '1600777478614724671', '', 0, 'http://wx.qlogo.cn/mmhead/Q3auHgzwzM43qyI9vM4Q8jYrdl7ia8FakbibeTWnSmTVu7QjtxHLJib2g/0', 'iPhoneXS', 'iPhoneXS', 1, '', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, '56,34,35', NULL, NULL, NULL); +INSERT INTO ads_pi_cuser_all_info +(corp_id, staff_id, external_user_id, is_deleted, main_id, birthday, gender, avater, name, remark_name, `type`, client_number, M0000001, M0000002, M0000003, M0000004, M0000005, M0000006, M0000007, M0000008, M0000009, M0000010, M0000011, M0000012, M0000013, M0000014, M0000015, M0000016, M0000017, M0000018, M0000019, M0000020, product_reach_times, common_reach_times, last_action, qw_tag_ids, stgy_tag_ids, last_reached_task_id, last_reached_task_score, current_max_score_mode) +VALUES('ww36b98e83f52f6bcc1', '0af73fc236bf444aadc801cd4c416539', 'wmfvPXDAAA7OrmzTSkEVRkphuGx3hSVA', 0, '1604732822185627669', '', 2, 'http://wx.qlogo.cn/mmhead/9M0PhLTmTIeHGOibG2yxg90drr4nhu6NuJ5O4J9bskXicNShwsiaukk6g/0', 'mate20.0410q', 'mate20.0410q', 1, '', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, '45,34', '275', 0, NULL); +INSERT INTO ads_pi_cuser_all_info +(corp_id, staff_id, external_user_id, is_deleted, main_id, birthday, gender, avater, name, remark_name, `type`, client_number, M0000001, M0000002, M0000003, M0000004, M0000005, M0000006, M0000007, M0000008, M0000009, M0000010, M0000011, M0000012, M0000013, M0000014, M0000015, M0000016, M0000017, M0000018, M0000019, M0000020, product_reach_times, common_reach_times, last_action, qw_tag_ids, stgy_tag_ids, last_reached_task_id, last_reached_task_score, current_max_score_mode) +VALUES('ww36b98e83f52f6bcc2', '0af73fc236bf444aadc801cd4c416539', 'wmfvPXDAAAHCcW-cFR5U2yPG5zfAS4rg', 0, '1600777478614724671', '', 0, 'http://wx.qlogo.cn/mmhead/Q3auHgzwzM43qyI9vM4Q8jYrdl7ia8FakbibeTWnSmTVu7QjtxHLJib2g/0', 'iPhoneXS', 'iPhoneXS', 1, '', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, '22,25,34', NULL, NULL, NULL); + + +select stgy_tag_ids from ads_pi_cuser_all_info WHERE arrays_overlap(split_by_string(stgy_tag_ids,','),['23','22']); + +select * from ads_pi_cuser_all_info WHERE arrays_overlap(split_by_string(stgy_tag_ids,','),['23','22']) and arrays_overlap(split_by_string(stgy_tag_ids,','),['35']); \ No newline at end of file From b5eb1f0aed881e00d829546aed24e5678253d268 Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Wed, 29 Nov 2023 19:35:22 +0800 Subject: [PATCH 22/50] [fix](statistics) Fix show auto analyze missing jobs bug (#27761) --- .../apache/doris/statistics/AnalysisManager.java | 2 +- .../doris/statistics/AnalysisManagerTest.java | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 159461f463ed4c..88890c21d6e061 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -1051,7 +1051,7 @@ protected SimpleQueue createSimpleQueue(Collection c a -> { // DO NOTHING return null; - }, null); + }, collection); } // Remove col stats status from TableStats if failed load some col stats after analyze corresponding column so that diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java index c210a60262dfab..20672730e51543 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java @@ -29,6 +29,7 @@ import org.apache.doris.statistics.AnalysisInfo.AnalysisType; import org.apache.doris.statistics.AnalysisInfo.JobType; import org.apache.doris.statistics.AnalysisInfo.ScheduleType; +import org.apache.doris.statistics.util.SimpleQueue; import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.annotations.VisibleForTesting; @@ -394,4 +395,16 @@ public void testRecordLimit3() { Assertions.assertEquals(2, analysisManager.autoJobs.size()); } + @Test + public void testCreateSimpleQueue() { + AnalysisManager analysisManager = new AnalysisManager(); + ArrayList jobs = Lists.newArrayList(); + jobs.add(new AnalysisInfoBuilder().setJobId(1).build()); + jobs.add(new AnalysisInfoBuilder().setJobId(2).build()); + SimpleQueue simpleQueue = analysisManager.createSimpleQueue(jobs, analysisManager); + Assertions.assertEquals(2, simpleQueue.size()); + simpleQueue = analysisManager.createSimpleQueue(null, analysisManager); + Assertions.assertEquals(0, simpleQueue.size()); + } + } From 86d179c82074f87a38f5377ec7ce63199681bf8e Mon Sep 17 00:00:00 2001 From: Kang Date: Wed, 29 Nov 2023 19:43:03 +0800 Subject: [PATCH 23/50] [bugfix](topn) fix coredump in copy_column_data_to_block when nullable mismatch return RuntimeError if copy_column_data_to_block nullable mismatch to avoid coredump in input_col_ptr->filter_by_selector(sel_rowid_idx, select_size, raw_res_ptr) . The problem is reported by a doris user but I can not reproduce it, so there is no testcase added currently. --- be/src/vec/core/block.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h index ebb0f6e23d93cb..939597d50b6550 100644 --- a/be/src/vec/core/block.h +++ b/be/src/vec/core/block.h @@ -154,6 +154,12 @@ class Block { reinterpret_cast(raw_res_ptr.get()); col_ptr_nullable->get_null_map_column().insert_many_defaults(select_size); raw_res_ptr = col_ptr_nullable->get_nested_column_ptr(); + } else if (!raw_res_ptr->is_nullable() && input_col_ptr->is_nullable()) { + LOG(WARNING) << "nullable mismatch for raw_res_column: " + << this->get_by_position(block_cid).dump_structure() + << " input_column: " << input_col_ptr->dump_structure() + << " block_cid: " << block_cid << " select_size: " << select_size; + return Status::RuntimeError("copy_column_data_to_block nullable mismatch"); } return input_col_ptr->filter_by_selector(sel_rowid_idx, select_size, raw_res_ptr); From 46c1b7a5260275342b6bcd7b1ea58f623f8fc158 Mon Sep 17 00:00:00 2001 From: AKIRA <33112463+Kikyou1997@users.noreply.github.com> Date: Wed, 29 Nov 2023 20:59:02 +0800 Subject: [PATCH 24/50] [opt](stats) Use escape rather than base64 for min/max value #27746 (#27748) --- .../doris/statistics/BaseAnalysisTask.java | 6 +++--- .../apache/doris/statistics/ColStatsData.java | 16 +++------------- .../apache/doris/statistics/ColumnStatistic.java | 4 ---- .../doris/statistics/OlapAnalysisTask.java | 4 ++-- .../doris/statistics/StatisticsRepository.java | 4 ++-- .../doris/statistics/util/StatisticsUtil.java | 4 +++- .../doris/statistics/BaseAnalysisTaskTest.java | 4 ++-- .../doris/statistics/OlapAnalysisTaskTest.java | 6 +++--- .../apache/doris/statistics/StatsMockUtil.java | 2 +- .../statistics/util/StatisticsUtilTest.java | 8 ++++++++ 10 files changed, 27 insertions(+), 31 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java index 824e3f74abd84a..4c0e07ce6cb6e7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java @@ -252,7 +252,7 @@ protected String getDataSizeFunction(Column column, boolean useDuj1) { protected String getMinFunction() { if (tableSample == null) { - return "to_base64(CAST(MIN(`${colName}`) as ${type})) "; + return "CAST(MIN(`${colName}`) as ${type}) "; } else { // Min value is not accurate while sample, so set it to NULL to avoid optimizer generate bad plan. return "NULL"; @@ -276,7 +276,7 @@ protected String getNdvFunction(String totalRows) { // Max value is not accurate while sample, so set it to NULL to avoid optimizer generate bad plan. protected String getMaxFunction() { if (tableSample == null) { - return "to_base64(CAST(MAX(`${colName}`) as ${type})) "; + return "CAST(MAX(`${colName}`) as ${type}) "; } else { return "NULL"; } @@ -315,7 +315,7 @@ protected void runQuery(String sql, boolean needEncode) { long startTime = System.currentTimeMillis(); try (AutoCloseConnectContext a = StatisticsUtil.buildConnectContext()) { stmtExecutor = new StmtExecutor(a.connectContext, sql); - ColStatsData colStatsData = new ColStatsData(stmtExecutor.executeInternalQuery().get(0), needEncode); + ColStatsData colStatsData = new ColStatsData(stmtExecutor.executeInternalQuery().get(0)); job.appendBuf(this, Collections.singletonList(colStatsData)); } finally { LOG.debug("End cost time in secs: " + (System.currentTimeMillis() - startTime) / 1000); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java index fa7546abd5b412..7878a06548818f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColStatsData.java @@ -21,8 +21,6 @@ import com.google.common.annotations.VisibleForTesting; -import java.nio.charset.StandardCharsets; -import java.util.Base64; import java.util.StringJoiner; /** @@ -56,8 +54,6 @@ public class ColStatsData { public final String updateTime; - public final boolean needEncode; - @VisibleForTesting public ColStatsData() { statsId = new StatsId(); @@ -68,10 +64,9 @@ public ColStatsData() { maxLit = null; dataSizeInBytes = 0; updateTime = null; - needEncode = true; } - public ColStatsData(ResultRow row, boolean needEncode) { + public ColStatsData(ResultRow row) { this.statsId = new StatsId(row); this.count = (long) Double.parseDouble(row.get(7)); this.ndv = (long) Double.parseDouble(row.getWithDefault(8, "0")); @@ -80,7 +75,6 @@ public ColStatsData(ResultRow row, boolean needEncode) { this.maxLit = row.get(11); this.dataSizeInBytes = (long) Double.parseDouble(row.getWithDefault(12, "0")); this.updateTime = row.get(13); - this.needEncode = needEncode; } public String toSQL(boolean roundByParentheses) { @@ -93,12 +87,8 @@ public String toSQL(boolean roundByParentheses) { sj.add(String.valueOf(count)); sj.add(String.valueOf(ndv)); sj.add(String.valueOf(nullCount)); - sj.add(minLit == null ? "NULL" : needEncode - ? "'" + Base64.getEncoder().encodeToString(minLit.getBytes(StandardCharsets.UTF_8)) + "'" - : "'" + minLit + "'"); - sj.add(maxLit == null ? "NULL" : needEncode - ? "'" + Base64.getEncoder().encodeToString(maxLit.getBytes(StandardCharsets.UTF_8)) + "'" - : "'" + maxLit + "'"); + sj.add(minLit == null ? "NULL" : "'" + StatisticsUtil.escapeSQL(minLit) + "'"); + sj.add(maxLit == null ? "NULL" : "'" + StatisticsUtil.escapeSQL(maxLit) + "'"); sj.add(String.valueOf(dataSizeInBytes)); sj.add(StatisticsUtil.quote(updateTime)); return sj.toString(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java index eefaf7badeb0e6..5ea1f9097bb052 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java @@ -31,8 +31,6 @@ import org.apache.logging.log4j.Logger; import org.json.JSONObject; -import java.nio.charset.StandardCharsets; -import java.util.Base64; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -175,7 +173,6 @@ public static ColumnStatistic fromResultRow(ResultRow row) { String min = row.get(10); String max = row.get(11); if (min != null && !min.equalsIgnoreCase("NULL")) { - min = new String(Base64.getDecoder().decode(min), StandardCharsets.UTF_8); // Internal catalog get the min/max value using a separate SQL, // and the value is already encoded by base64. Need to handle internal and external catalog separately. if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && min.equalsIgnoreCase("NULL")) { @@ -193,7 +190,6 @@ public static ColumnStatistic fromResultRow(ResultRow row) { columnStatisticBuilder.setMinValue(Double.NEGATIVE_INFINITY); } if (max != null && !max.equalsIgnoreCase("NULL")) { - max = new String(Base64.getDecoder().decode(max), StandardCharsets.UTF_8); if (catalogId != InternalCatalog.INTERNAL_CATALOG_ID && max.equalsIgnoreCase("NULL")) { columnStatisticBuilder.setMaxValue(Double.POSITIVE_INFINITY); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java index 04a763b688440a..45ccdbb0d684d3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java @@ -92,8 +92,8 @@ protected void doSample() throws Exception { // Get basic stats, including min and max. ResultRow basicStats = collectBasicStat(r); long rowCount = tbl.getRowCount(); - String min = StatisticsUtil.encodeValue(basicStats, 0); - String max = StatisticsUtil.encodeValue(basicStats, 1); + String min = StatisticsUtil.escapeSQL(basicStats.get(0)); + String max = StatisticsUtil.escapeSQL(basicStats.get(1)); boolean limitFlag = false; long rowsToSample = pair.second; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java index 12ca6b4aa1f938..29e11ac75ad724 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsRepository.java @@ -269,8 +269,8 @@ public static void alterColumnStatistics(AlterColumnStatsStmt alterColumnStatsSt params.put("count", String.valueOf(columnStatistic.count)); params.put("ndv", String.valueOf(columnStatistic.ndv)); params.put("nullCount", String.valueOf(columnStatistic.numNulls)); - params.put("min", StatisticsUtil.encodeString(min)); - params.put("max", StatisticsUtil.encodeString(max)); + params.put("min", StatisticsUtil.escapeSQL(min)); + params.put("max", StatisticsUtil.escapeSQL(max)); params.put("dataSize", String.valueOf(columnStatistic.dataSize)); if (partitionIds.isEmpty()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index 66b13ff87f1be8..3f9abcad5f98d6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -786,7 +786,9 @@ public static String escapeSQL(String str) { if (str == null) { return null; } - return org.apache.commons.lang3.StringUtils.replace(str, "'", "''"); + return str.replace("'", "''") + .replace("\\", "\\\\") + .replace("\"", "\"\""); } public static boolean isExternalTable(String catalogName, String dbName, String tblName) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/BaseAnalysisTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/BaseAnalysisTaskTest.java index fe81c055e0dfbf..187c4d207dfcad 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/BaseAnalysisTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/BaseAnalysisTaskTest.java @@ -42,14 +42,14 @@ public void testGetFunctions() { Assertions.assertEquals("SUM(t1.count) * 4", dataSizeFunction); String minFunction = olapAnalysisTask.getMinFunction(); - Assertions.assertEquals("to_base64(CAST(MIN(`${colName}`) as ${type})) ", minFunction); + Assertions.assertEquals("CAST(MIN(`${colName}`) as ${type}) ", minFunction); olapAnalysisTask.tableSample = new TableSample(true, 20L); minFunction = olapAnalysisTask.getMinFunction(); Assertions.assertEquals("NULL", minFunction); olapAnalysisTask.tableSample = null; String maxFunction = olapAnalysisTask.getMaxFunction(); - Assertions.assertEquals("to_base64(CAST(MAX(`${colName}`) as ${type})) ", maxFunction); + Assertions.assertEquals("CAST(MAX(`${colName}`) as ${type}) ", maxFunction); olapAnalysisTask.tableSample = new TableSample(true, 20L); maxFunction = olapAnalysisTask.getMaxFunction(); Assertions.assertEquals("NULL", maxFunction); diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java index c174795b36bf20..7b7894e54b44e1 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java @@ -151,7 +151,7 @@ public ResultRow collectBasicStat(AutoCloseConnectContext context) { @Mock public void runQuery(String sql, boolean needEncode) { Assertions.assertFalse(needEncode); - Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', '-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) * SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0) * 5.0 as `null_count`, 'MQ==' AS `min`, 'Mg==' AS `max`, SUM(LENGTH(`column_key`) * count) * 5.0 AS `data_size`, NOW() FROM ( SELECT t0.`${colName}` as `column_key`, COUNT(1) as `count` FROM (SELECT `${colName}` FROM `catalogName`.`${dbName}`.`${tblName}` limit 100) as `t0` GROUP BY `t0`.`${colName}` ) as `t1` ", sql); + Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', '-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) * SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0) * 5.0 as `null_count`, '1' AS `min`, '2' AS `max`, SUM(LENGTH(`column_key`) * count) * 5.0 AS `data_size`, NOW() FROM ( SELECT t0.`${colName}` as `column_key`, COUNT(1) as `count` FROM (SELECT `${colName}` FROM `catalogName`.`${dbName}`.`${tblName}` limit 100) as `t0` GROUP BY `t0`.`${colName}` ) as `t1` ", sql); return; } }; @@ -218,7 +218,7 @@ public ResultRow collectBasicStat(AutoCloseConnectContext context) { @Mock public void runQuery(String sql, boolean needEncode) { Assertions.assertFalse(needEncode); - Assertions.assertEquals(" SELECT CONCAT(30001, '-', -1, '-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`, ROUND(NDV(`${colName}`) * 5.0) as `ndv`, ROUND(SUM(CASE WHEN `${colName}` IS NULL THEN 1 ELSE 0 END) * 5.0) AS `null_count`, 'MQ==' AS `min`, 'Mg==' AS `max`, SUM(LENGTH(`${colName}`)) * 5.0 AS `data_size`, NOW() FROM `catalogName`.`${dbName}`.`${tblName}` limit 100", sql); + Assertions.assertEquals(" SELECT CONCAT(30001, '-', -1, '-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`, ROUND(NDV(`${colName}`) * 5.0) as `ndv`, ROUND(SUM(CASE WHEN `${colName}` IS NULL THEN 1 ELSE 0 END) * 5.0) AS `null_count`, '1' AS `min`, '2' AS `max`, SUM(LENGTH(`${colName}`)) * 5.0 AS `data_size`, NOW() FROM `catalogName`.`${dbName}`.`${tblName}` limit 100", sql); return; } }; @@ -292,7 +292,7 @@ public ResultRow collectBasicStat(AutoCloseConnectContext context) { @Mock public void runQuery(String sql, boolean needEncode) { Assertions.assertFalse(needEncode); - Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', '-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) * SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0) * 5.0 as `null_count`, 'MQ==' AS `min`, 'Mg==' AS `max`, SUM(LENGTH(`column_key`) * count) * 5.0 AS `data_size`, NOW() FROM ( SELECT t0.`${colName}` as `column_key`, COUNT(1) as `count` FROM (SELECT `${colName}` FROM `catalogName`.`${dbName}`.`${tblName}` limit 100) as `t0` GROUP BY `t0`.`${colName}` ) as `t1` ", sql); + Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', '-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) * SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0) * 5.0 as `null_count`, '1' AS `min`, '2' AS `max`, SUM(LENGTH(`column_key`) * count) * 5.0 AS `data_size`, NOW() FROM ( SELECT t0.`${colName}` as `column_key`, COUNT(1) as `count` FROM (SELECT `${colName}` FROM `catalogName`.`${dbName}`.`${tblName}` limit 100) as `t0` GROUP BY `t0`.`${colName}` ) as `t1` ", sql); return; } }; diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsMockUtil.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsMockUtil.java index 84e1112d216cdb..45c78f665b0dc9 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsMockUtil.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsMockUtil.java @@ -40,7 +40,7 @@ public static ResultRow mockResultRow(boolean col) { add("0"); add("10"); // 11 - add("MTE="); + add("11"); add("12"); add(String.valueOf(System.currentTimeMillis())); }}; diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java index 2c0854dcf2125b..c827a7d1690900 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java @@ -142,4 +142,12 @@ public void testEncodeValue() throws Exception { .encodeToString("a".getBytes(StandardCharsets.UTF_8)), StatisticsUtil.encodeValue(row, 1)); Assertions.assertEquals("NULL", StatisticsUtil.encodeValue(row, 2)); } + + @Test + public void testEscape() { + // \'" + String origin = "\\'\""; + // \\''"" + Assertions.assertEquals("\\\\''\"\"", StatisticsUtil.escapeSQL(origin)); + } } From 4f15abd353fa22378b6c80d314491a12674d06c3 Mon Sep 17 00:00:00 2001 From: Mingyu Chen Date: Wed, 29 Nov 2023 21:00:46 +0800 Subject: [PATCH 25/50] [refactor](http) disable snapshot and get_log_file api (#27724) (#27770) --- be/src/common/config.cpp | 1 + be/src/common/config.h | 2 ++ be/src/http/action/snapshot_action.cpp | 4 ++++ .../src/main/java/org/apache/doris/common/Config.java | 4 ++++ .../java/org/apache/doris/httpv2/rest/GetLogFileAction.java | 3 +++ 5 files changed, 14 insertions(+) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 858c94b02ba8da..b87c19a179b4b3 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1113,6 +1113,7 @@ DEFINE_mInt32(variant_max_merged_tablet_schema_size, "2048"); // then the new created tablet will not locate in the high use disk. // range: 0 ~ 100 DEFINE_mInt32(disk_diff_usage_percentage_for_create_tablet, "20"); +DEFINE_Bool(enable_snapshot_action, "false"); // clang-format off #ifdef BE_TEST diff --git a/be/src/common/config.h b/be/src/common/config.h index 6a5001bac89224..52a2eeee2e34e6 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1174,6 +1174,8 @@ DECLARE_mInt32(variant_max_merged_tablet_schema_size); // then the new created tablet will not locate in the high use disk. // range: 0 ~ 100 DECLARE_mInt32(disk_diff_usage_percentage_for_create_tablet); +// whether to enable /api/snapshot api +DECLARE_Bool(enable_snapshot_action); #ifdef BE_TEST // test s3 diff --git a/be/src/http/action/snapshot_action.cpp b/be/src/http/action/snapshot_action.cpp index c705d3c9bac74b..c93220ac0f8cb4 100644 --- a/be/src/http/action/snapshot_action.cpp +++ b/be/src/http/action/snapshot_action.cpp @@ -41,6 +41,10 @@ SnapshotAction::SnapshotAction(ExecEnv* exec_env, TPrivilegeHier::type hier, : HttpHandlerWithAuth(exec_env, hier, type) {} void SnapshotAction::handle(HttpRequest* req) { + if (!config::enable_snapshot_action) { + HttpChannel::send_reply(req, HttpStatus::BAD_REQUEST, "feature disabled"); + return; + } LOG(INFO) << "accept one request " << req->debug_string(); // Get tablet id const std::string& tablet_id_str = req->param(TABLET_ID); diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 92fa8130757be3..d02ea3a8e07ea1 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -2227,4 +2227,8 @@ public class Config extends ConfigBase { "the max package size fe thrift server can receive,avoid accepting error" + "or too large package causing OOM,default 20000000(20M),set -1 for unlimited. "}) public static int fe_thrift_max_pkg_bytes = 20000000; + + @ConfField(description = {"是否开启通过http接口获取log文件的功能", + "Whether to enable the function of getting log files through http interface"}) + public static boolean enable_get_log_file_api = false; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/GetLogFileAction.java b/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/GetLogFileAction.java index c96e19971b155b..475ee5ace1ea0d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/GetLogFileAction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/GetLogFileAction.java @@ -55,6 +55,9 @@ public class GetLogFileAction extends RestBaseController { @RequestMapping(path = "/api/get_log_file", method = {RequestMethod.GET, RequestMethod.HEAD}) public Object execute(HttpServletRequest request, HttpServletResponse response) { + if (!Config.enable_get_log_file_api) { + return ResponseEntityBuilder.badRequest("feature disabled"); + } executeCheckPassword(request, response); checkGlobalAuth(ConnectContext.get().getCurrentUserIdentity(), PrivPredicate.ADMIN); From 2643671aa8929f7786b459e90a732f22b8458710 Mon Sep 17 00:00:00 2001 From: zhiqiang Date: Thu, 30 Nov 2023 08:40:09 +0800 Subject: [PATCH 26/50] [branch-2.0](pick 27738) Warning log to trace send fragment #27738 (#27760) --- be/src/service/internal_service.cpp | 28 +++++++++++++++++-- .../java/org/apache/doris/qe/Coordinator.java | 27 ++++++++++++++---- 2 files changed, 47 insertions(+), 8 deletions(-) diff --git a/be/src/service/internal_service.cpp b/be/src/service/internal_service.cpp index 12becc4b7296a3..597c8fdfef43bb 100644 --- a/be/src/service/internal_service.cpp +++ b/be/src/service/internal_service.cpp @@ -461,10 +461,22 @@ Status PInternalServiceImpl::_exec_plan_fragment_impl(const std::string& ser_req uint32_t len = ser_request.size(); RETURN_IF_ERROR(deserialize_thrift_msg(buf, &len, compact, &t_request)); } + const auto& fragment_list = t_request.paramsList; + MonotonicStopWatch timer; + timer.start(); for (const TExecPlanFragmentParams& params : t_request.paramsList) { RETURN_IF_ERROR(_exec_env->fragment_mgr()->exec_plan_fragment(params)); } + + timer.stop(); + double cost_secs = static_cast(timer.elapsed_time()) / 1000000000ULL; + if (cost_secs > 5) { + LOG_WARNING("Prepare {} fragments of query {} costs {} seconds, it costs too much", + fragment_list.size(), print_id(fragment_list.front().params.query_id), + cost_secs); + } + return Status::OK(); } else if (version == PFragmentRequestVersion::VERSION_3) { TPipelineFragmentParamsList t_request; @@ -474,9 +486,21 @@ Status PInternalServiceImpl::_exec_plan_fragment_impl(const std::string& ser_req RETURN_IF_ERROR(deserialize_thrift_msg(buf, &len, compact, &t_request)); } - for (const TPipelineFragmentParams& params : t_request.params_list) { - RETURN_IF_ERROR(_exec_env->fragment_mgr()->exec_plan_fragment(params)); + const auto& fragment_list = t_request.params_list; + MonotonicStopWatch timer; + timer.start(); + + for (const TPipelineFragmentParams& fragment : fragment_list) { + RETURN_IF_ERROR(_exec_env->fragment_mgr()->exec_plan_fragment(fragment)); + } + + timer.stop(); + double cost_secs = static_cast(timer.elapsed_time()) / 1000000000ULL; + if (cost_secs > 5) { + LOG_WARNING("Prepare {} fragments of query {} costs {} seconds, it costs too much", + fragment_list.size(), print_id(fragment_list.front().query_id), cost_secs); } + return Status::OK(); } else { return Status::InternalError("invalid version"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java index af8eb518e9d319..cf3175bc72dcf6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java @@ -875,8 +875,13 @@ private void waitRpc(List>> futures, long leftTimeMs, String operation) throws RpcException, UserException { if (leftTimeMs <= 0) { - throw new UserException("timeout before waiting for " + operation + " RPC. Elapse(sec): " + ( - (System.currentTimeMillis() - timeoutDeadline) / 1000 + queryOptions.query_timeout)); + long elapsed = (System.currentTimeMillis() - timeoutDeadline) / 1000 + queryOptions.getExecutionTimeout(); + String msg = String.format( + "timeout before waiting {} rpc, query timeout: {}, already elapsed:{}, left for this:{}", + operation, queryOptions.getExecutionTimeout(), elapsed, leftTimeMs); + LOG.warn("Query {} {}", DebugUtil.printId(queryId), msg); + throw new UserException(msg); } long timeoutMs = Math.min(leftTimeMs, Config.remote_fragment_exec_timeout_ms); @@ -971,7 +983,10 @@ private void waitPipelineRpc(List Date: Thu, 30 Nov 2023 09:29:37 +0800 Subject: [PATCH 27/50] [branch-2.0](pick #27771) Add more detail msg for waitRPC exception (#27773) From 07ad06d983a19d5dfb86f41d0164e1a1b26e5734 Mon Sep 17 00:00:00 2001 From: Pxl Date: Thu, 30 Nov 2023 13:19:50 +0800 Subject: [PATCH 28/50] [Bug](pipeline) prevent PipelineFragmentContext destruct early (#27790) --- be/src/pipeline/task_scheduler.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/be/src/pipeline/task_scheduler.cpp b/be/src/pipeline/task_scheduler.cpp index 00b27fc59bf437..dc9dd17826f8bb 100644 --- a/be/src/pipeline/task_scheduler.cpp +++ b/be/src/pipeline/task_scheduler.cpp @@ -319,6 +319,11 @@ void TaskScheduler::_do_work(size_t index) { } void TaskScheduler::_try_close_task(PipelineTask* task, PipelineTaskState state) { + // close_a_pipeline may delete fragment context and will core in some defer + // code, because the defer code will access fragment context it self. + std::shared_ptr lock_for_context = + task->fragment_context()->shared_from_this(); + if (task->is_pending_finish()) { task->set_state(PipelineTaskState::PENDING_FINISH); _blocked_task_scheduler->add_blocked_task(task); From 77f30522b775ab84dcdc7e6049b2a1cc557cd60b Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Thu, 30 Nov 2023 13:56:00 +0800 Subject: [PATCH 29/50] [deps](compression) Opt gzip decompress by libdeflate on X86 and X86_64 platforms: 1. Add libdeflate lib. (#27542) (#27711) Backport from #27542. --- thirdparty/build-thirdparty.sh | 15 +++++++++++++++ thirdparty/vars.sh | 7 +++++++ 2 files changed, 22 insertions(+) diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh index 2a911ed27bbd82..7cb2b0b12c999f 100755 --- a/thirdparty/build-thirdparty.sh +++ b/thirdparty/build-thirdparty.sh @@ -1633,6 +1633,20 @@ build_dragonbox() { "${BUILD_SYSTEM}" install } +# libdeflate +build_libdeflate() { + check_if_source_exist "${LIBDEFLATE_SOURCE}" + cd "${TP_SOURCE_DIR}/${LIBDEFLATE_SOURCE}" + + rm -rf "${BUILD_DIR}" + mkdir -p "${BUILD_DIR}" + cd "${BUILD_DIR}" + + "${CMAKE_CMD}" -G "${GENERATOR}" -DCMAKE_INSTALL_PREFIX="${TP_INSTALL_DIR}" -DCMAKE_BUILD_TYPE=Release .. + "${BUILD_SYSTEM}" -j "${PARALLEL}" + "${BUILD_SYSTEM}" install +} + if [[ "${#packages[@]}" -eq 0 ]]; then packages=( libunixodbc @@ -1694,6 +1708,7 @@ if [[ "${#packages[@]}" -eq 0 ]]; then fast_float libunwind dragonbox + libdeflate ) if [[ "$(uname -s)" == 'Darwin' ]]; then read -r -a packages <<<"binutils gettext ${packages[*]}" diff --git a/thirdparty/vars.sh b/thirdparty/vars.sh index e9e891059e85ca..33d956ac484092 100644 --- a/thirdparty/vars.sh +++ b/thirdparty/vars.sh @@ -470,6 +470,12 @@ DRAGONBOX_NAME=dragonbox-1.1.3.tar.gz DRAGONBOX_SOURCE=dragonbox-1.1.3 DRAGONBOX_MD5SUM="889dc00db9612c6949a4ccf8115e0e6a" +# libdeflate +LIBDEFLATE_DOWNLOAD="https://github.com/ebiggers/libdeflate/archive/refs/tags/v1.19.tar.gz" +LIBDEFLATE_NAME=libdeflate-1.19.tar.gz +LIBDEFLATE_SOURCE=libdeflate-1.19 +LIBDEFLATE_MD5SUM="c69e9193d2975a729068ffa862c81fb6" + # all thirdparties which need to be downloaded is set in array TP_ARCHIVES export TP_ARCHIVES=( 'LIBEVENT' @@ -539,6 +545,7 @@ export TP_ARCHIVES=( 'FAST_FLOAT' 'HADOOP_LIBS' 'DRAGONBOX' + 'LIBDEFLATE' ) if [[ "$(uname -s)" == 'Darwin' ]]; then From 5d98711ce53570388becc535de184112efeb8024 Mon Sep 17 00:00:00 2001 From: amory Date: Thu, 30 Nov 2023 18:47:50 +0800 Subject: [PATCH 30/50] [FIX](case)fix case truncate table first #27792 --- ...ed_types_insert_into_with_dup_table.groovy | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/regression-test/suites/datatype_p0/nested_types/negative_cases/test_nested_types_insert_into_with_dup_table.groovy b/regression-test/suites/datatype_p0/nested_types/negative_cases/test_nested_types_insert_into_with_dup_table.groovy index f6ce940868db5f..f58e41078bf153 100644 --- a/regression-test/suites/datatype_p0/nested_types/negative_cases/test_nested_types_insert_into_with_dup_table.groovy +++ b/regression-test/suites/datatype_p0/nested_types/negative_cases/test_nested_types_insert_into_with_dup_table.groovy @@ -20,7 +20,27 @@ import org.codehaus.groovy.runtime.IOGroovyMethods suite("test_nested_types_insert_into_with_dup_table", "p0") { sql 'use regression_test_datatype_p0_nested_types' sql 'set enable_nereids_planner=false' - + + sql """ + truncate table `tbl_array_nested_types_dup`; + """ + + sql """ + truncate table `tbl_array_nested_types_dup2`; + """ + + sql """ + truncate table `tbl_map_types_dup`; + """ + + sql """ + truncate table `tbl_array_map_types_dup`; + """ + + sql """ + truncate table `tbl_map_array_types_dup`; + """ + // test action for scala to array with scala type // current we support char family to insert nested type test { From 3026226732d0cf67e2620843f1c0bad0c62cc38e Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Thu, 30 Nov 2023 19:06:55 +0800 Subject: [PATCH 31/50] [doc](stats) add auto_analyze_table_width_threshold description. (#27818) (#27832) --- docs/en/docs/query-acceleration/statistics.md | 35 +++++++++---------- .../docs/query-acceleration/statistics.md | 1 + 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/docs/en/docs/query-acceleration/statistics.md b/docs/en/docs/query-acceleration/statistics.md index fdc1661faf6ac8..c7a58277580a00 100644 --- a/docs/en/docs/query-acceleration/statistics.md +++ b/docs/en/docs/query-acceleration/statistics.md @@ -283,23 +283,23 @@ mysql> KILL ANALYZE 52357;
-```markdown ## 3. Session Variables and Configuration Options --- ### 3.1 Session Variables -| Session Variable | Description | Default Value | -| ----------------------------- | -------------------------------------------- | ------------- | -| auto_analyze_start_time | Start time for automatic statistics collection | 00:00:00 | -| auto_analyze_end_time | End time for automatic statistics collection | 23:59:59 | -| enable_auto_analyze | Enable automatic collection functionality | true | -| huge_table_default_sample_rows | Sampling rows for large tables | 4194304 | -| huge_table_lower_bound_size_in_bytes | Tables with size greater than this value will be automatically sampled during collection of statistics | 5368709120 | -| huge_table_auto_analyze_interval_in_millis | Controls the minimum time interval for automatic ANALYZE on large tables. Tables with sizes greater than `huge_table_lower_bound_size_in_bytes * 5` will be ANALYZEed only once within this time interval. | 43200000 | -| table_stats_health_threshold | Ranges from 0 to 100. If data updates since the last statistics collection exceed `(100 - table_stats_health_threshold)%`, the table's statistics are considered outdated. | 60 | -| analyze_timeout | Controls the timeout for synchronous ANALYZE in seconds | 43200 | +|Session Variable|Description|Default Value| +|---|---|---| +|auto_analyze_start_time|Start time for automatic statistics collection|00:00:00| +|auto_analyze_end_time|End time for automatic statistics collection|23:59:59| +|enable_auto_analyze|Enable automatic collection functionality|true| +|huge_table_default_sample_rows|Sampling rows for large tables|4194304| +|huge_table_lower_bound_size_in_bytes|Tables with size greater than this value will be automatically sampled during collection of statistics|5368709120| +|huge_table_auto_analyze_interval_in_millis|Controls the minimum time interval for automatic ANALYZE on large tables. Tables with sizes greater than `huge_table_lower_bound_size_in_bytes * 5` will be ANALYZEed only once within this time interval.|43200000| +|table_stats_health_threshold|Ranges from 0 to 100. If data updates since the last statistics collection exceed `(100 - table_stats_health_threshold)%`, the table's statistics are considered outdated.|60| +|analyze_timeout|Controls the timeout for synchronous ANALYZE in seconds|43200| +|auto_analyze_table_width_threshold|Controls the maximum width of table that will be auto analyzed. Table with more columns than this value will not be auto analyzed.|70|
@@ -307,15 +307,14 @@ mysql> KILL ANALYZE 52357; The following FE configuration options are typically not a major concern: -| FE Configuration Option | Description | Default Value | -| ---------------------------------- | ---------------------------------------- | ------------- | -| analyze_record_limit | Controls the persistence of statistics job execution records | 20000 | -| stats_cache_size | FE-side statistics cache entries | 500,000 | -| statistics_simultaneously_running_task_num | Number of asynchronous jobs that can run simultaneously | 3 | -| statistics_sql_mem_limit_in_bytes | Controls the amount of BE memory each statistics SQL can use | 2,147,483,648 bytes (2 GiB) | +|FE Configuration Option|Description|Default Value| +|---|---|---| +|analyze_record_limit|Controls the persistence of statistics job execution records|20000| +|stats_cache_size|FE-side statistics cache entries|500,000| +|statistics_simultaneously_running_task_num|Number of asynchronous jobs that can run simultaneously|3| +|statistics_sql_mem_limit_in_bytes|Controls the amount of BE memory each statistics SQL can use|2,147,483,648 bytes (2 GiB)|
-``` ## 4. Common Issues diff --git a/docs/zh-CN/docs/query-acceleration/statistics.md b/docs/zh-CN/docs/query-acceleration/statistics.md index def289dad12100..20b535e357b8ad 100644 --- a/docs/zh-CN/docs/query-acceleration/statistics.md +++ b/docs/zh-CN/docs/query-acceleration/statistics.md @@ -303,6 +303,7 @@ mysql> KILL ANALYZE 52357; |huge_table_auto_analyze_interval_in_millis|控制对大表的自动ANALYZE的最小时间间隔,在该时间间隔内大小超过huge_table_lower_bound_size_in_bytes * 5的表仅ANALYZE一次|43200000| |table_stats_health_threshold|取值在0-100之间,当自上次统计信息收集操作之后,数据更新量达到 (100 - table_stats_health_threshold)% ,认为该表的统计信息已过时|60| |analyze_timeout|控制ANALYZE超时时间,单位为秒|43200| +|auto_analyze_table_width_threshold|控制自动统计信息收集处理的最大表宽度,列数大于该值的表不会参与自动统计信息收集|70|
From 5e9404a18a431ede706dfdd5799b20964dcfd82b Mon Sep 17 00:00:00 2001 From: Lei Zhang <27994433+SWJTU-ZhangLei@users.noreply.github.com> Date: Thu, 30 Nov 2023 19:40:02 +0800 Subject: [PATCH 32/50] [fix](bdbje) Fix bdbje logging level not work (#27597) (#27788) * `EnvironmentConfig.FILE_LOGGING_LEVEL` only set FileHandlerLevel, we should set logger level firstly, otherwise it will not take effect. --- fe/fe-common/src/main/java/org/apache/doris/common/Config.java | 2 +- .../java/org/apache/doris/journal/bdbje/BDBEnvironment.java | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index d02ea3a8e07ea1..0594c80830776a 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -1963,7 +1963,7 @@ public class Config extends ConfigBase { * OFF, SEVERE, WARNING, INFO, CONFIG, FINE, FINER, FINEST, ALL */ @ConfField - public static String bdbje_file_logging_level = "ALL"; + public static String bdbje_file_logging_level = "INFO"; /** * When holding lock time exceeds the threshold, need to report it. diff --git a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java index 5dec4a4c5704f8..650cdf98ca79c2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java +++ b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java @@ -59,6 +59,7 @@ import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.logging.Level; import java.util.stream.Collectors; /* this class contains the reference to bdb environment. @@ -139,6 +140,8 @@ public void setup(File envHome, String selfNodeName, String selfNodeHostPort, String.valueOf(Config.bdbje_reserved_disk_bytes)); if (BDBJE_LOG_LEVEL.contains(Config.bdbje_file_logging_level)) { + java.util.logging.Logger parent = java.util.logging.Logger.getLogger("com.sleepycat.je"); + parent.setLevel(Level.parse(Config.bdbje_file_logging_level)); environmentConfig.setConfigParam(EnvironmentConfig.FILE_LOGGING_LEVEL, Config.bdbje_file_logging_level); } else { LOG.warn("bdbje_file_logging_level invalid value: {}, will not take effort, use default", From 5da3ba03846db31738a48a72fdc4c39709d64ecf Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Thu, 30 Nov 2023 22:09:19 +0800 Subject: [PATCH 33/50] [Opt](compression) Opt gzip decompress by libdeflate on X86 and X86_64 platforms: 2. Opt gzip decompression by libdeflate lib. (#27669) (#27801) Backport from #27669. --- be/CMakeLists.txt | 1 + be/cmake/thirdparty.cmake | 6 ++++ be/src/util/block_compression.cpp | 46 ++++++++++++++++++++++++++++++- 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index e10da917b5be10..58e059ca6ec7d7 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -559,6 +559,7 @@ set(COMMON_THIRDPARTY xml2 lzma simdjson + deflate ) if ((ARCH_AMD64 OR ARCH_AARCH64) AND OS_LINUX) diff --git a/be/cmake/thirdparty.cmake b/be/cmake/thirdparty.cmake index e8ea8c33308596..493cbb87555ab4 100644 --- a/be/cmake/thirdparty.cmake +++ b/be/cmake/thirdparty.cmake @@ -299,3 +299,9 @@ if (OS_MACOSX) add_library(intl STATIC IMPORTED) set_target_properties(intl PROPERTIES IMPORTED_LOCATION "${THIRDPARTY_DIR}/lib/libintl.a") endif() + +# Only used on x86 or x86_64 +if ("${CMAKE_BUILD_TARGET_ARCH}" STREQUAL "x86" OR "${CMAKE_BUILD_TARGET_ARCH}" STREQUAL "x86_64") + add_library(deflate STATIC IMPORTED) + set_target_properties(deflate PROPERTIES IMPORTED_LOCATION ${THIRDPARTY_DIR}/lib/libdeflate.a) +endif() diff --git a/be/src/util/block_compression.cpp b/be/src/util/block_compression.cpp index fb4c963c11e0f8..3c051c240ef3da 100644 --- a/be/src/util/block_compression.cpp +++ b/be/src/util/block_compression.cpp @@ -20,6 +20,11 @@ #include #include #include +// Only used on x86 or x86_64 +#if defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(__i386__) || \ + defined(__i386) || defined(_M_IX86) +#include +#endif #include #include #include @@ -929,7 +934,7 @@ class ZstdBlockCompression : public BlockCompressionCodec { mutable std::vector _ctx_d_pool; }; -class GzipBlockCompression final : public ZlibBlockCompression { +class GzipBlockCompression : public ZlibBlockCompression { public: static GzipBlockCompression* instance() { static GzipBlockCompression s_instance; @@ -1006,6 +1011,39 @@ class GzipBlockCompression final : public ZlibBlockCompression { const static int MEM_LEVEL = 8; }; +// Only used on x86 or x86_64 +#if defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(__i386__) || \ + defined(__i386) || defined(_M_IX86) +class GzipBlockCompressionByLibdeflate final : public GzipBlockCompression { +public: + GzipBlockCompressionByLibdeflate() : GzipBlockCompression() {} + static GzipBlockCompressionByLibdeflate* instance() { + static GzipBlockCompressionByLibdeflate s_instance; + return &s_instance; + } + ~GzipBlockCompressionByLibdeflate() override = default; + + Status decompress(const Slice& input, Slice* output) override { + if (input.empty()) { + output->size = 0; + return Status::OK(); + } + thread_local std::unique_ptr + decompressor {libdeflate_alloc_decompressor(), libdeflate_free_decompressor}; + if (!decompressor) { + return Status::InternalError("libdeflate_alloc_decompressor error."); + } + std::size_t out_len; + auto result = libdeflate_gzip_decompress(decompressor.get(), input.data, input.size, + output->data, output->size, &out_len); + if (result != LIBDEFLATE_SUCCESS) { + return Status::InternalError("libdeflate_gzip_decompress error, res={}", result); + } + return Status::OK(); + } +}; +#endif + Status get_block_compression_codec(segment_v2::CompressionTypePB type, BlockCompressionCodec** codec) { switch (type) { @@ -1054,7 +1092,13 @@ Status get_block_compression_codec(tparquet::CompressionCodec::type parquet_code *codec = ZstdBlockCompression::instance(); break; case tparquet::CompressionCodec::GZIP: +// Only used on x86 or x86_64 +#if defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(__i386__) || \ + defined(__i386) || defined(_M_IX86) + *codec = GzipBlockCompressionByLibdeflate::instance(); +#else *codec = GzipBlockCompression::instance(); +#endif break; default: return Status::InternalError("unknown compression type({})", parquet_codec); From dbd4012fe7d49151d71df224f1aa88e7d5a7369e Mon Sep 17 00:00:00 2001 From: zhiqiang Date: Fri, 1 Dec 2023 09:51:52 +0800 Subject: [PATCH 34/50] [branch-2.0](fix) Fix broken exception message #27836 --- .../main/java/org/apache/doris/qe/Coordinator.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java index cf3175bc72dcf6..a03d88863a88f0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java @@ -877,7 +877,7 @@ private void waitRpc(List Date: Fri, 1 Dec 2023 10:54:04 +0800 Subject: [PATCH 35/50] [Bug](func) coredump in equal for null in function (#27843) --- be/src/vec/functions/comparison_equal_for_null.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/be/src/vec/functions/comparison_equal_for_null.cpp b/be/src/vec/functions/comparison_equal_for_null.cpp index 20f7e8859a7ebe..d9d058e52ffdab 100644 --- a/be/src/vec/functions/comparison_equal_for_null.cpp +++ b/be/src/vec/functions/comparison_equal_for_null.cpp @@ -99,7 +99,8 @@ class FunctionEqForNull : public IFunction { SimpleFunctionFactory::instance().get_function("eq", eq_columns, return_type); DCHECK(func_eq); temporary_block.insert(ColumnWithTypeAndName {nullptr, return_type, ""}); - func_eq->execute(context, temporary_block, {0, 1}, 2, input_rows_count); + RETURN_IF_ERROR( + func_eq->execute(context, temporary_block, {0, 1}, 2, input_rows_count)); if (left_nullable) { auto res_column = std::move(*temporary_block.get_by_position(2).column).mutate(); @@ -127,7 +128,8 @@ class FunctionEqForNull : public IFunction { Block temporary_block(eq_columns); temporary_block.insert(ColumnWithTypeAndName {nullptr, return_type, ""}); - func_eq->execute(context, temporary_block, {0, 1}, 2, input_rows_count); + RETURN_IF_ERROR( + func_eq->execute(context, temporary_block, {0, 1}, 2, input_rows_count)); auto res_nullable_column = assert_cast( std::move(*temporary_block.get_by_position(2).column).mutate().get()); From 455fc07e39742ad902910da2741da022ca625423 Mon Sep 17 00:00:00 2001 From: AKIRA <33112463+Kikyou1997@users.noreply.github.com> Date: Fri, 1 Dec 2023 15:07:34 +0800 Subject: [PATCH 36/50] [minor](stats) Update olap table row count after analyze (#27858) pick from master #27814 --- .../doris/statistics/TableStatsMeta.java | 25 +++++++--- .../doris/statistics/AnalysisManagerTest.java | 2 +- .../StatisticsAutoCollectorTest.java | 2 +- .../doris/statistics/TableStatsMetaTest.java | 46 +++++++++++++++++++ 4 files changed, 67 insertions(+), 8 deletions(-) create mode 100644 fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java index 97a2cd15186f24..04b9e3486b0827 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java @@ -18,6 +18,7 @@ package org.apache.doris.statistics; import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.TableIf; import org.apache.doris.common.io.Text; import org.apache.doris.common.io.Writable; @@ -25,6 +26,7 @@ import org.apache.doris.statistics.AnalysisInfo.JobType; import org.apache.doris.statistics.util.StatisticsUtil; +import com.google.common.annotations.VisibleForTesting; import com.google.gson.annotations.SerializedName; import java.io.DataInput; @@ -54,7 +56,7 @@ public class TableStatsMeta implements Writable { // Used for external table. @SerializedName("rowCount") - public final long rowCount; + public long rowCount; @SerializedName("updateTime") public long updatedTime; @@ -65,6 +67,12 @@ public class TableStatsMeta implements Writable { @SerializedName("trigger") public JobType jobType; + @VisibleForTesting + public TableStatsMeta() { + tblId = 0; + idxId = 0; + } + // It's necessary to store these fields separately from AnalysisInfo, since the lifecycle between AnalysisInfo // and TableStats is quite different. public TableStatsMeta(long rowCount, AnalysisInfo analyzedJob, TableIf table) { @@ -136,11 +144,16 @@ public void update(AnalysisInfo analyzedJob, TableIf tableIf) { } } jobType = analyzedJob.jobType; - if (tableIf != null && analyzedJob.colToPartitions.keySet() - .containsAll(tableIf.getBaseSchema().stream() - .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())) - .map(Column::getName).collect(Collectors.toSet()))) { - updatedRows.set(0); + if (tableIf != null) { + if (tableIf instanceof OlapTable) { + rowCount = tableIf.getRowCount(); + } + if (analyzedJob.colToPartitions.keySet() + .containsAll(tableIf.getBaseSchema().stream() + .filter(c -> !StatisticsUtil.isUnsupportedType(c.getType())) + .map(Column::getName).collect(Collectors.toSet()))) { + updatedRows.set(0); + } } } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java index 20672730e51543..602b637d17d428 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java @@ -328,7 +328,7 @@ public void testReAnalyze() { new MockUp() { int count = 0; - int[] rowCount = new int[]{100, 200}; + int[] rowCount = new int[]{100, 100, 200, 200}; final Column c = new Column("col1", PrimitiveType.INT); @Mock diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java index 732196ef31b861..56475201765230 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java @@ -471,7 +471,7 @@ public TableIf findTable(long catalogId, long dbId, long tblId) { new MockUp() { int count = 0; - int[] rowCounts = {100, 0}; + int[] rowCounts = {100, 100, 100, 0, 0, 0, 0}; @Mock public long getRowCount() { return rowCounts[count++]; diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java new file mode 100644 index 00000000000000..b5e73ba09da728 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/TableStatsMetaTest.java @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.statistics; + +import org.apache.doris.catalog.OlapTable; + +import mockit.Mock; +import mockit.MockUp; +import mockit.Mocked; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.HashMap; + +class TableStatsMetaTest { + + @Test + void update(@Mocked OlapTable table) { + new MockUp() { + @Mock + public long getRowCount() { + return 4; + } + }; + TableStatsMeta tableStatsMeta = new TableStatsMeta(); + AnalysisInfo jobInfo = new AnalysisInfoBuilder().setColToPartitions(new HashMap<>()) + .setColName("col1").build(); + tableStatsMeta.update(jobInfo, table); + Assertions.assertEquals(4, tableStatsMeta.rowCount); + } +} From 82ca1c42bbc7bdc10f3543baef1c5c5d52bdffff Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Fri, 1 Dec 2023 15:42:36 +0800 Subject: [PATCH 37/50] [fix](stats)min and max return NaN when table is empty (#27863) fix analyze empty table and min/max null value bug: 1. Skip empty analyze task for sample analyze task. (Full analyze task already skipped). 2. Check sample rows is not 0 before calculate the scale factor. 3. Remove ' in sql template after remove base64 encoding for min/max value. backport https://github.com/apache/doris/pull/27862 --- .../doris/statistics/BaseAnalysisTask.java | 10 +++--- .../doris/statistics/HMSAnalysisTask.java | 11 ++++--- .../doris/statistics/JdbcAnalysisTask.java | 2 +- .../doris/statistics/OlapAnalysisTask.java | 32 ++++++++++--------- .../apache/doris/statistics/AnalyzeTest.java | 2 +- .../doris/statistics/HMSAnalysisTaskTest.java | 22 +++++++++++++ .../statistics/OlapAnalysisTaskTest.java | 9 ++---- 7 files changed, 56 insertions(+), 32 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java index 4c0e07ce6cb6e7..449a6fc15e5177 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java @@ -92,8 +92,8 @@ public abstract class BaseAnalysisTask { + "${rowCount} AS `row_count`, " + "${ndvFunction} as `ndv`, " + "IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0) * ${scaleFactor} as `null_count`, " - + "'${min}' AS `min`, " - + "'${max}' AS `max`, " + + "${min} AS `min`, " + + "${max} AS `max`, " + "${dataSizeFunction} * ${scaleFactor} AS `data_size`, " + "NOW() " + "FROM ( " @@ -115,8 +115,8 @@ public abstract class BaseAnalysisTask { + "${row_count} AS `row_count`, " + "${ndv} AS `ndv`, " + "${null_count} AS `null_count`, " - + "'${min}' AS `min`, " - + "'${max}' AS `max`, " + + "${min} AS `min`, " + + "${max} AS `max`, " + "${data_size} AS `data_size`, " + "NOW() "; @@ -311,7 +311,7 @@ public void setJob(AnalysisJob job) { this.job = job; } - protected void runQuery(String sql, boolean needEncode) { + protected void runQuery(String sql) { long startTime = System.currentTimeMillis(); try (AutoCloseConnectContext a = StatisticsUtil.buildConnectContext()) { stmtExecutor = new StmtExecutor(a.connectContext, sql); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java index 7bd540de2c716f..efd99d1eca953d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HMSAnalysisTask.java @@ -159,7 +159,7 @@ private void getOrdinaryColumnStats() throws Exception { } stringSubstitutor = new StringSubstitutor(params); String sql = stringSubstitutor.replace(sb.toString()); - runQuery(sql, true); + runQuery(sql); } // Collect the partition column stats through HMS metadata. @@ -201,12 +201,12 @@ private void getPartitionColumnStats() throws Exception { params.put("row_count", String.valueOf(count)); params.put("ndv", String.valueOf(ndv)); params.put("null_count", String.valueOf(numNulls)); - params.put("min", min); - params.put("max", max); + params.put("min", StatisticsUtil.quote(min)); + params.put("max", StatisticsUtil.quote(max)); params.put("data_size", String.valueOf(dataSize)); StringSubstitutor stringSubstitutor = new StringSubstitutor(params); String sql = stringSubstitutor.replace(ANALYZE_PARTITION_COLUMN_TEMPLATE); - runQuery(sql, true); + runQuery(sql); } private String updateMinValue(String currentMin, String value) { @@ -313,6 +313,9 @@ protected Pair getSampleInfo() { for (long size : chunkSizes) { total += size; } + if (total == 0) { + return Pair.of(1.0, 0L); + } // Calculate the sample target size for percent and rows sample. if (tableSample.isPercent()) { target = total * tableSample.getSampleValue() / 100; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java index e2e83aa8fa624a..50c437fa8f9945 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/JdbcAnalysisTask.java @@ -110,7 +110,7 @@ private void getColumnStats() throws Exception { params.put("dataSizeFunction", getDataSizeFunction(col, false)); StringSubstitutor stringSubstitutor = new StringSubstitutor(params); String sql = stringSubstitutor.replace(sb.toString()); - runQuery(sql, true); + runQuery(sql); } private Map buildTableStatsParams(String partId) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java index 45ccdbb0d684d3..bf144b1a1160f4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java @@ -59,7 +59,13 @@ public OlapAnalysisTask(AnalysisInfo info) { } public void doExecute() throws Exception { - + Set partitionNames = info.colToPartitions.get(info.colName); + if (partitionNames.isEmpty()) { + LOG.debug("Skip empty empty partition task for column {} in {}.{}.{}", + info.catalogId, info.dbId, info.tblId, info.colName); + job.appendBuf(this, Collections.emptyList()); + return; + } if (tableSample != null) { doSample(); } else { @@ -113,24 +119,25 @@ protected void doSample() throws Exception { params.put("scaleFactor", String.valueOf(scaleFactor)); params.put("sampleHints", tabletStr.isEmpty() ? "" : String.format("TABLET(%s)", tabletStr)); params.put("ndvFunction", getNdvFunction(String.valueOf(rowCount))); - params.put("min", min); - params.put("max", max); + params.put("min", StatisticsUtil.quote(min)); + params.put("max", StatisticsUtil.quote(max)); params.put("rowCount", String.valueOf(rowCount)); params.put("type", col.getType().toString()); params.put("limit", ""); if (needLimit()) { // If the tablets to be sampled are too large, use limit to control the rows to read, and re-calculate // the scaleFactor. - limitFlag = true; rowsToSample = Math.min(getSampleRows(), pair.second); - params.put("limit", "limit " + rowsToSample); - params.put("scaleFactor", String.valueOf(scaleFactor * (double) pair.second / rowsToSample)); + // Empty table doesn't need to limit. + if (rowsToSample > 0) { + limitFlag = true; + params.put("limit", "limit " + rowsToSample); + params.put("scaleFactor", String.valueOf(scaleFactor * (double) pair.second / rowsToSample)); + } } StringSubstitutor stringSubstitutor = new StringSubstitutor(params); String sql; if (useLinearAnalyzeTemplate()) { - params.put("min", StatisticsUtil.quote(min)); - params.put("max", StatisticsUtil.quote(max)); // For single unique key, use count as ndv. if (isSingleUniqueKey()) { params.put("ndvFunction", String.valueOf(rowCount)); @@ -148,7 +155,7 @@ protected void doSample() throws Exception { col.getName(), params.get("rowCount"), rowsToSample, params.get("scaleFactor"), limitFlag, tbl.isDistributionColumn(col.getName()), tbl.isPartitionColumn(col.getName()), col.isKey(), isSingleUniqueKey()); - runQuery(sql, false); + runQuery(sql); } } @@ -169,11 +176,6 @@ protected ResultRow collectBasicStat(AutoCloseConnectContext context) { */ protected void doFull() throws Exception { LOG.debug("Will do full collection for column {}", col.getName()); - Set partitionNames = info.colToPartitions.get(info.colName); - if (partitionNames.isEmpty()) { - job.appendBuf(this, Collections.emptyList()); - return; - } Map params = new HashMap<>(); params.put("internalDB", FeConstants.INTERNAL_DB_NAME); params.put("columnStatTbl", StatisticConstants.STATISTIC_TBL_NAME); @@ -189,7 +191,7 @@ protected void doFull() throws Exception { params.put("tblName", String.valueOf(tbl.getName())); StringSubstitutor stringSubstitutor = new StringSubstitutor(params); String collectColStats = stringSubstitutor.replace(COLLECT_COL_STATISTICS); - runQuery(collectColStats, true); + runQuery(collectColStats); } // Get sample tablets id and scale up scaleFactor diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalyzeTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalyzeTest.java index 1487d19246649d..9cebb3283d5935 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalyzeTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalyzeTest.java @@ -157,7 +157,7 @@ public void execSQLs(List partitionAnalysisSQLs, Map par new MockUp() { @Mock - protected void runQuery(String sql, boolean needEncode) {} + protected void runQuery(String sql) {} }; HashMap> colToPartitions = Maps.newHashMap(); colToPartitions.put("col1", Collections.singleton("t1")); diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java index 24a74053bb6619..a569a5cb06d9dc 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/HMSAnalysisTaskTest.java @@ -21,6 +21,7 @@ import org.apache.doris.catalog.Column; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.external.HMSExternalTable; +import org.apache.doris.common.Pair; import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.collect.Lists; @@ -138,4 +139,25 @@ public long getDataSize(boolean singleReplica) { Assertions.assertEquals(1000, tableSample.getSampleValue()); } + @Test + public void testGetSampleInfo(@Mocked HMSExternalTable tableIf) + throws Exception { + new MockUp() { + @Mock + public List getChunkSizes() { + return Lists.newArrayList(); + } + }; + HMSAnalysisTask task = new HMSAnalysisTask(); + task.setTable(tableIf); + task.tableSample = null; + Pair info1 = task.getSampleInfo(); + Assertions.assertEquals(1.0, info1.first); + Assertions.assertEquals(0, info1.second); + task.tableSample = new TableSample(false, 100L); + Pair info2 = task.getSampleInfo(); + Assertions.assertEquals(1.0, info2.first); + Assertions.assertEquals(0, info2.second); + } + } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java index 7b7894e54b44e1..ed9122f70b6560 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java @@ -149,8 +149,7 @@ public ResultRow collectBasicStat(AutoCloseConnectContext context) { } @Mock - public void runQuery(String sql, boolean needEncode) { - Assertions.assertFalse(needEncode); + public void runQuery(String sql) { Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', '-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) * SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0) * 5.0 as `null_count`, '1' AS `min`, '2' AS `max`, SUM(LENGTH(`column_key`) * count) * 5.0 AS `data_size`, NOW() FROM ( SELECT t0.`${colName}` as `column_key`, COUNT(1) as `count` FROM (SELECT `${colName}` FROM `catalogName`.`${dbName}`.`${tblName}` limit 100) as `t0` GROUP BY `t0`.`${colName}` ) as `t1` ", sql); return; } @@ -216,8 +215,7 @@ public ResultRow collectBasicStat(AutoCloseConnectContext context) { } @Mock - public void runQuery(String sql, boolean needEncode) { - Assertions.assertFalse(needEncode); + public void runQuery(String sql) { Assertions.assertEquals(" SELECT CONCAT(30001, '-', -1, '-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`, ROUND(NDV(`${colName}`) * 5.0) as `ndv`, ROUND(SUM(CASE WHEN `${colName}` IS NULL THEN 1 ELSE 0 END) * 5.0) AS `null_count`, '1' AS `min`, '2' AS `max`, SUM(LENGTH(`${colName}`)) * 5.0 AS `data_size`, NOW() FROM `catalogName`.`${dbName}`.`${tblName}` limit 100", sql); return; } @@ -290,8 +288,7 @@ public ResultRow collectBasicStat(AutoCloseConnectContext context) { } @Mock - public void runQuery(String sql, boolean needEncode) { - Assertions.assertFalse(needEncode); + public void runQuery(String sql) { Assertions.assertEquals("SELECT CONCAT('30001', '-', '-1', '-', 'null') AS `id`, 10001 AS `catalog_id`, 20001 AS `db_id`, 30001 AS `tbl_id`, -1 AS `idx_id`, 'null' AS `col_id`, NULL AS `part_id`, 500 AS `row_count`, SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) * SUM(`t1`.`count`) / 500) as `ndv`, IFNULL(SUM(IF(`t1`.`column_key` IS NULL, `t1`.`count`, 0)), 0) * 5.0 as `null_count`, '1' AS `min`, '2' AS `max`, SUM(LENGTH(`column_key`) * count) * 5.0 AS `data_size`, NOW() FROM ( SELECT t0.`${colName}` as `column_key`, COUNT(1) as `count` FROM (SELECT `${colName}` FROM `catalogName`.`${dbName}`.`${tblName}` limit 100) as `t0` GROUP BY `t0`.`${colName}` ) as `t1` ", sql); return; } From d3ecff96320075be1b1759ad4fb216d509921bf9 Mon Sep 17 00:00:00 2001 From: AKIRA <33112463+Kikyou1997@users.noreply.github.com> Date: Fri, 1 Dec 2023 16:55:19 +0800 Subject: [PATCH 38/50] [minor](stats) Throw error when sync analyze failed (#27846) pick from master #27845 --- .../apache/doris/statistics/AnalysisJob.java | 25 ++++++------------- .../doris/statistics/BaseAnalysisTask.java | 4 +-- .../doris/statistics/StatisticsCleaner.java | 17 ++++++++++--- .../doris/statistics/AnalysisJobTest.java | 4 +-- 4 files changed, 24 insertions(+), 26 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisJob.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisJob.java index 5b1ca430409b91..b5dc2cceb9bbeb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisJob.java @@ -119,7 +119,7 @@ protected void writeBuf() { if (killed) { return; } - // buf could be empty when nothing need to do, for example user submit an analysis task for table with no data + // buf could be empty when nothing need to do,r for example user submit an analysis task for table with no data // change if (!buf.isEmpty()) { String insertStmt = "INSERT INTO " + StatisticConstants.FULL_QUALIFIED_STATS_TBL_NAME + " VALUES "; @@ -128,28 +128,17 @@ protected void writeBuf() { values.add(data.toSQL(true)); } insertStmt += values.toString(); - int retryTimes = 0; - while (retryTimes < StatisticConstants.ANALYZE_TASK_RETRY_TIMES) { - if (killed) { - return; - } - try (AutoCloseConnectContext r = StatisticsUtil.buildConnectContext(false)) { - stmtExecutor = new StmtExecutor(r.connectContext, insertStmt); - executeWithExceptionOnFail(stmtExecutor); - break; - } catch (Exception t) { - LOG.warn("Failed to write buf: " + insertStmt, t); - retryTimes++; - if (retryTimes >= StatisticConstants.ANALYZE_TASK_RETRY_TIMES) { - updateTaskState(AnalysisState.FAILED, t.getMessage()); - return; - } - } + try (AutoCloseConnectContext r = StatisticsUtil.buildConnectContext(false)) { + stmtExecutor = new StmtExecutor(r.connectContext, insertStmt); + executeWithExceptionOnFail(stmtExecutor); + } catch (Exception t) { + throw new RuntimeException("Failed to analyze: " + t.getMessage()); } } updateTaskState(AnalysisState.FINISHED, ""); syncLoadStats(); queryFinished.clear(); + buf.clear(); } protected void executeWithExceptionOnFail(StmtExecutor stmtExecutor) throws Exception { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java index 449a6fc15e5177..cdd5c3fc69f139 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java @@ -181,7 +181,7 @@ protected void prepareExecution() { protected void executeWithRetry() { int retriedTimes = 0; - while (retriedTimes <= StatisticConstants.ANALYZE_TASK_RETRY_TIMES) { + while (retriedTimes < StatisticConstants.ANALYZE_TASK_RETRY_TIMES) { if (killed) { break; } @@ -193,7 +193,7 @@ protected void executeWithRetry() { throw new RuntimeException(t); } LOG.warn("Failed to execute analysis task, retried times: {}", retriedTimes++, t); - if (retriedTimes > StatisticConstants.ANALYZE_TASK_RETRY_TIMES) { + if (retriedTimes >= StatisticConstants.ANALYZE_TASK_RETRY_TIMES) { job.taskFailed(this, t.getMessage()); throw new RuntimeException(t); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java index 88fa098e57bb86..21deb44c0a43d2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCleaner.java @@ -75,11 +75,20 @@ protected void runAfterCatalogReady() { } public synchronized void clear() { - if (!init()) { - return; + try { + if (!init()) { + return; + } + clearStats(colStatsTbl); + clearStats(histStatsTbl); + } finally { + colStatsTbl = null; + histStatsTbl = null; + idToCatalog = null; + idToDb = null; + idToTbl = null; + idToMVIdx = null; } - clearStats(colStatsTbl); - clearStats(histStatsTbl); } private void clearStats(OlapTable statsTbl) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java index bca05d8299c020..255ab7106aa2de 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisJobTest.java @@ -206,7 +206,7 @@ public void updateTaskState(AnalysisState state, String msg) { @Mock protected void executeWithExceptionOnFail(StmtExecutor stmtExecutor) throws Exception { - throw new RuntimeException(); + // DO NOTHING } @Mock @@ -218,7 +218,7 @@ protected void syncLoadStats() { job.queryFinished = new HashSet<>(); job.queryFinished.add(task2); job.writeBuf(); - Assertions.assertEquals(1, job.queryFinished.size()); + Assertions.assertEquals(0, job.queryFinished.size()); } } From 3135f5f5c2d6ae6ae51883dcbb9468da165c330d Mon Sep 17 00:00:00 2001 From: AKIRA <33112463+Kikyou1997@users.noreply.github.com> Date: Fri, 1 Dec 2023 19:54:07 +0800 Subject: [PATCH 39/50] [fix](stats) Don't save colToPartitions anymore to save mem (#27880) pick from master #27879 --- .../org/apache/doris/statistics/AnalysisInfo.java | 2 -- .../org/apache/doris/statistics/AnalysisJob.java | 12 ++++++++++++ .../org/apache/doris/statistics/AnalysisManager.java | 10 ++++++++++ .../apache/doris/statistics/StatisticConstants.java | 2 ++ 4 files changed, 24 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java index 248714b27f78a2..4a10e921efbcf1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java @@ -97,10 +97,8 @@ public enum ScheduleType { public final long tblId; // TODO: Map here is wired, List is enough - @SerializedName("colToPartitions") public final Map> colToPartitions; - @SerializedName("partitionNames") public final Set partitionNames; @SerializedName("colName") diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisJob.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisJob.java index b5dc2cceb9bbeb..f8f6cda259122c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisJob.java @@ -179,6 +179,18 @@ public void cancel() { public void deregisterJob() { analysisManager.removeJob(jobInfo.jobId); analysisManager.analysisJobIdToTaskMap.remove(jobInfo.jobId); + for (BaseAnalysisTask task : queryingTask) { + task.info.colToPartitions.clear(); + if (task.info.partitionNames != null) { + task.info.partitionNames.clear(); + } + } + for (BaseAnalysisTask task : queryFinished) { + task.info.colToPartitions.clear(); + if (task.info.partitionNames != null) { + task.info.partitionNames.clear(); + } + } } protected void syncLoadStats() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 88890c21d6e061..24618fd38ca662 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -613,6 +613,10 @@ public void updateTableStats(AnalysisInfo jobInfo) { tableStats.update(jobInfo, tbl); logCreateTableStats(tableStats); } + jobInfo.colToPartitions.clear(); + if (jobInfo.partitionNames != null) { + jobInfo.partitionNames.clear(); + } } public List showAnalysisJob(ShowAnalyzeStmt stmt) { @@ -792,6 +796,9 @@ public void replayCreateAnalysisJob(AnalysisInfo jobInfo) { while (analysisJobInfoMap.size() >= Config.analyze_record_limit) { analysisJobInfoMap.remove(analysisJobInfoMap.pollFirstEntry().getKey()); } + if (jobInfo.message != null && jobInfo.message.length() >= StatisticConstants.MSG_LEN_UPPER_BOUND) { + jobInfo.message = jobInfo.message.substring(0, StatisticConstants.MSG_LEN_UPPER_BOUND); + } this.analysisJobInfoMap.put(jobInfo.jobId, jobInfo); } @@ -799,6 +806,9 @@ public void replayCreateAnalysisTask(AnalysisInfo taskInfo) { while (analysisTaskInfoMap.size() >= Config.analyze_record_limit) { analysisTaskInfoMap.remove(analysisTaskInfoMap.pollFirstEntry().getKey()); } + if (taskInfo.message != null && taskInfo.message.length() >= StatisticConstants.MSG_LEN_UPPER_BOUND) { + taskInfo.message = taskInfo.message.substring(0, StatisticConstants.MSG_LEN_UPPER_BOUND); + } this.analysisTaskInfoMap.put(taskInfo.taskId, taskInfo); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java index 3e2b9c8bc287d5..695edae386467c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java @@ -100,6 +100,8 @@ public class StatisticConstants { public static final int AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD = 70; + public static final int MSG_LEN_UPPER_BOUND = 1024; + static { SYSTEM_DBS.add(SystemInfoService.DEFAULT_CLUSTER + ClusterNamespace.CLUSTER_DELIMITER + FeConstants.INTERNAL_DB_NAME); From b51dc708268a88eed8e7a463c72381cb04c140ca Mon Sep 17 00:00:00 2001 From: starocean999 <40539150+starocean999@users.noreply.github.com> Date: Fri, 1 Dec 2023 20:05:21 +0800 Subject: [PATCH 40/50] [fix](nereids) set operation's result type is wrong if decimal overflows (#27872) pick from master #27870 --- .../plans/logical/LogicalSetOperation.java | 32 ++++++++++++++++--- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSetOperation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSetOperation.java index c58b0aa0688a94..1835363bce826b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSetOperation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSetOperation.java @@ -17,6 +17,7 @@ package org.apache.doris.nereids.trees.plans.logical; +import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.Type; import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.properties.LogicalProperties; @@ -137,10 +138,7 @@ private List> castCommonDataTypeOutputs() { for (int i = 0; i < child(0).getOutput().size(); ++i) { Slot left = child(0).getOutput().get(i); Slot right = child(1).getOutput().get(i); - DataType compatibleType = DataType.fromCatalogType(Type.getAssignmentCompatibleType( - left.getDataType().toCatalogDataType(), - right.getDataType().toCatalogDataType(), - false)); + DataType compatibleType = getAssignmentCompatibleType(left.getDataType(), right.getDataType()); Expression newLeft = TypeCoercionUtils.castIfNotSameType(left, compatibleType); Expression newRight = TypeCoercionUtils.castIfNotSameType(right, compatibleType); if (newLeft instanceof Cast) { @@ -211,4 +209,30 @@ public abstract LogicalSetOperation withChildrenAndTheirOutputs( public int getArity() { return children.size(); } + + private DataType getAssignmentCompatibleType(DataType left, DataType right) { + if (left.isNullType()) { + return right; + } + if (right.isNullType()) { + return left; + } + if (left.equals(right)) { + return left; + } + Type resultType = Type.getAssignmentCompatibleType(left.toCatalogDataType(), + right.toCatalogDataType(), false); + if (resultType.isDecimalV3()) { + int oldPrecision = resultType.getPrecision(); + int oldScale = resultType.getDecimalDigits(); + int integerPart = oldPrecision - oldScale; + int maxPrecision = ScalarType.MAX_DECIMAL128_PRECISION; + if (oldPrecision > maxPrecision) { + int newScale = maxPrecision - integerPart; + resultType = + ScalarType.createDecimalType(maxPrecision, newScale < 0 ? 0 : newScale); + } + } + return DataType.fromCatalogType(resultType); + } } From 4890d40ac0c27a635b456ffbff7c2a61516a5b02 Mon Sep 17 00:00:00 2001 From: Lightman <31928846+Lchangliang@users.noreply.github.com> Date: Fri, 1 Dec 2023 20:17:51 +0800 Subject: [PATCH 41/50] [Config] Modify the default value of tablet_schema_cache_recycle_interval (#27877) --- be/src/common/config.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index b87c19a179b4b3..c44249da760084 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1086,7 +1086,7 @@ DEFINE_Int32(group_commit_insert_threads, "10"); DEFINE_mInt32(scan_thread_nice_value, "0"); -DEFINE_mInt32(tablet_schema_cache_recycle_interval, "86400"); +DEFINE_mInt32(tablet_schema_cache_recycle_interval, "3600"); DEFINE_Bool(exit_on_exception, "false"); From 175868d2aadb2147fdabe5e6f54eaf84edcb09b4 Mon Sep 17 00:00:00 2001 From: Jerry Hu Date: Fri, 1 Dec 2023 20:21:06 +0800 Subject: [PATCH 42/50] [fix](like_func) incorrect result of like with 'NO_BACKSLASH_ESCAPES' mode(#27842) (#27851) --- be/src/vec/functions/like.cpp | 43 +++++++++------- fe/fe-core/src/main/jflex/sql_scanner.flex | 10 ++-- .../test_like_no_backslash_escapes_mode.out | 7 +++ ...test_like_no_backslash_escapes_mode.groovy | 50 +++++++++++++++++++ 4 files changed, 89 insertions(+), 21 deletions(-) create mode 100644 regression-test/data/query_p0/sql_functions/string_functions/test_like_no_backslash_escapes_mode.out create mode 100644 regression-test/suites/query_p0/sql_functions/string_functions/test_like_no_backslash_escapes_mode.groovy diff --git a/be/src/vec/functions/like.cpp b/be/src/vec/functions/like.cpp index 8851b777fee6e6..add09f845a62aa 100644 --- a/be/src/vec/functions/like.cpp +++ b/be/src/vec/functions/like.cpp @@ -453,24 +453,30 @@ void FunctionLike::convert_like_pattern(LikeSearchState* state, const std::strin bool is_escaped = false; for (size_t i = 0; i < pattern.size(); ++i) { - if (!is_escaped && pattern[i] == '%') { - re_pattern->append(".*"); - } else if (!is_escaped && pattern[i] == '_') { - re_pattern->append("."); - // check for escape char before checking for regex special chars, they might overlap - } else if (!is_escaped && pattern[i] == state->escape_char) { - is_escaped = true; - } else if (pattern[i] == '.' || pattern[i] == '[' || pattern[i] == ']' || - pattern[i] == '{' || pattern[i] == '}' || pattern[i] == '(' || - pattern[i] == ')' || pattern[i] == '\\' || pattern[i] == '*' || - pattern[i] == '+' || pattern[i] == '?' || pattern[i] == '|' || - pattern[i] == '^' || pattern[i] == '$') { - // escape all regex special characters; see list at - re_pattern->append("\\"); - re_pattern->append(1, pattern[i]); - is_escaped = false; + if (!is_escaped) { + switch (pattern[i]) { + case '%': + re_pattern->append(".*"); + break; + case '_': + re_pattern->append("."); + break; + default: + is_escaped = pattern[i] == state->escape_char; + if (!is_escaped) { + re_pattern->append(1, pattern[i]); + } + break; + } } else { - // regular character or escaped special character + if (pattern[i] == '.' || pattern[i] == '[' || pattern[i] == ']' || pattern[i] == '{' || + pattern[i] == '}' || pattern[i] == '(' || pattern[i] == ')' || pattern[i] == '\\' || + pattern[i] == '*' || pattern[i] == '+' || pattern[i] == '?' || pattern[i] == '|' || + pattern[i] == '^' || pattern[i] == '$') { + re_pattern->append("\\"); + } else if (pattern[i] != '%' && pattern[i] != '_') { + re_pattern->append("\\\\"); + } re_pattern->append(1, pattern[i]); is_escaped = false; } @@ -634,7 +640,8 @@ Status FunctionLike::open(FunctionContext* context, FunctionContext::FunctionSta opts.set_dot_nl(true); state->search_state.regex = std::make_unique(re_pattern, opts); if (!state->search_state.regex->ok()) { - return Status::InternalError("Invalid regex expression: {}", pattern_str); + return Status::InternalError("Invalid regex expression: {}(origin: {})", + re_pattern, pattern_str); } } diff --git a/fe/fe-core/src/main/jflex/sql_scanner.flex b/fe/fe-core/src/main/jflex/sql_scanner.flex index 3fcab4325184e4..ecf15f3d61d392 100644 --- a/fe/fe-core/src/main/jflex/sql_scanner.flex +++ b/fe/fe-core/src/main/jflex/sql_scanner.flex @@ -573,7 +573,11 @@ import org.apache.doris.qe.SqlModeHelper; return new Symbol(id, yyline+1, yycolumn+1, value); } - private static String escapeBackSlash(String str) { + private static String escapeBackSlash(String str, long sqlMode) { + if ((sqlMode & SqlModeHelper.MODE_NO_BACKSLASH_ESCAPES) != 0) { + return str; + } + StringWriter writer = new StringWriter(); int strLen = str.length(); for (int i = 0; i < strLen; ++i) { @@ -733,12 +737,12 @@ EndOfLineComment = "--" !({HintContent}|{ContainsLineTerminator}) {LineTerminato {SingleQuoteStringLiteral} { return newToken(SqlParserSymbols.STRING_LITERAL, - escapeBackSlash(yytext().substring(1, yytext().length()-1)).replaceAll("''", "'")); + escapeBackSlash(yytext().substring(1, yytext().length()-1), sql_mode).replaceAll("''", "'")); } {DoubleQuoteStringLiteral} { return newToken(SqlParserSymbols.STRING_LITERAL, - escapeBackSlash(yytext().substring(1, yytext().length()-1)).replaceAll("\"\"", "\"")); + escapeBackSlash(yytext().substring(1, yytext().length()-1), sql_mode).replaceAll("\"\"", "\"")); } {CommentedHintBegin} { diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_like_no_backslash_escapes_mode.out b/regression-test/data/query_p0/sql_functions/string_functions/test_like_no_backslash_escapes_mode.out new file mode 100644 index 00000000000000..13540d6ee2d122 --- /dev/null +++ b/regression-test/data/query_p0/sql_functions/string_functions/test_like_no_backslash_escapes_mode.out @@ -0,0 +1,7 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select1 -- +1 TIN\\PEXNB601C6UUTAB + +-- !select2 -- +1 TIN\\PEXNB601C6UUTAB + diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_like_no_backslash_escapes_mode.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_like_no_backslash_escapes_mode.groovy new file mode 100644 index 00000000000000..43a51fb31b78ea --- /dev/null +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_like_no_backslash_escapes_mode.groovy @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_like_no_backslash_escapes_mode") { + + sql """ set sql_mode = "NO_BACKSLASH_ESCAPES"; """ + def tbName = "test_like_no_backslash_escapes_mode_tbl" + sql "DROP TABLE IF EXISTS ${tbName}" + + sql """ + CREATE TABLE `${tbName}` ( + `id` INT NULL, + `value` VARCHAR(100) NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + sql """ + INSERT INTO ${tbName} VALUES (1, "TIN\\PEXNB601C6UUTAB"); + """ + + qt_select1 """ + select * from ${tbName} where `value` like "%TIN\\PE%"; + """ + + qt_select2 """ + select * from ${tbName} where `value` = "TIN\\PEXNB601C6UUTAB"; + """ + + // sql "DROP TABLE ${tbName};" +} \ No newline at end of file From b1fbdfd7e256192b244ac260b1819cee4384dccc Mon Sep 17 00:00:00 2001 From: Lei Zhang <27994433+SWJTU-ZhangLei@users.noreply.github.com> Date: Fri, 1 Dec 2023 22:28:17 +0800 Subject: [PATCH 43/50] [fix](fe) Fix show frontends npt in some situations (#27295) (#27789) ``` java.lang.NullPointerException: null at com.sleepycat.je.rep.util.ReplicationGroupAdmin.getMasterSocket(ReplicationGroupAdmin.java:191) at com.sleepycat.je.rep.util.ReplicationGroupAdmin.doMessageExchange(ReplicationGroupAdmin.java:607) at com.sleepycat.je.rep.util.ReplicationGroupAdmin.getGroup(ReplicationGroupAdmin.java:406) at org.apache.doris.ha.BDBHA.getElectableNodes(BDBHA.java:132) at org.apache.doris.common.proc.FrontendsProcNode.getFrontendsInfo(FrontendsProcNode.java:84) at org.apache.doris.qe.ShowExecutor.handleShowFrontends(ShowExecutor.java:1923) at org.apache.doris.qe.ShowExecutor.execute(ShowExecutor.java:355) at org.apache.doris.qe.StmtExecutor.handleShow(StmtExecutor.java:2113) ... ``` --- .../apache/doris/common/proc/FrontendsProcNode.java | 1 - .../src/main/java/org/apache/doris/ha/BDBHA.java | 10 ++++++---- .../org/apache/doris/journal/bdbje/BDBEnvironment.java | 5 +++++ 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/proc/FrontendsProcNode.java b/fe/fe-core/src/main/java/org/apache/doris/common/proc/FrontendsProcNode.java index a129bbd3386ce7..a88315ee858f00 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/proc/FrontendsProcNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/proc/FrontendsProcNode.java @@ -93,7 +93,6 @@ public static void getFrontendsInfo(Env env, List> infos) { } for (Frontend fe : env.getFrontends(null /* all */)) { - List info = new ArrayList(); info.add(fe.getNodeName()); info.add(fe.getHost()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/ha/BDBHA.java b/fe/fe-core/src/main/java/org/apache/doris/ha/BDBHA.java index 40de73292224ad..046bca5bac913b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/ha/BDBHA.java +++ b/fe/fe-core/src/main/java/org/apache/doris/ha/BDBHA.java @@ -104,11 +104,12 @@ public boolean fencing() { @Override public List getObserverNodes() { + List ret = new ArrayList(); ReplicationGroupAdmin replicationGroupAdmin = environment.getReplicationGroupAdmin(); if (replicationGroupAdmin == null) { - return null; + return ret; } - List ret = new ArrayList(); + try { ReplicationGroup replicationGroup = replicationGroupAdmin.getGroup(); for (ReplicationNode replicationNode : replicationGroup.getSecondaryNodes()) { @@ -123,11 +124,12 @@ public List getObserverNodes() { @Override public List getElectableNodes(boolean leaderIncluded) { + List ret = new ArrayList(); ReplicationGroupAdmin replicationGroupAdmin = environment.getReplicationGroupAdmin(); if (replicationGroupAdmin == null) { - return null; + return ret; } - List ret = new ArrayList(); + try { ReplicationGroup replicationGroup = replicationGroupAdmin.getGroup(); for (ReplicationNode replicationNode : replicationGroup.getElectableNodes()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java index 650cdf98ca79c2..47405c5c50e6cf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java +++ b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBEnvironment.java @@ -212,6 +212,11 @@ public ReplicationGroupAdmin getReplicationGroupAdmin() { .filter(Frontend::isAlive) .map(fe -> new InetSocketAddress(fe.getHost(), fe.getEditLogPort())) .collect(Collectors.toSet()); + + if (addresses.isEmpty()) { + LOG.info("addresses is empty"); + return null; + } return new ReplicationGroupAdmin(PALO_JOURNAL_GROUP, addresses); } From 3a6efdb10512e21913e0df7ae31be22587338792 Mon Sep 17 00:00:00 2001 From: zhiqiang Date: Fri, 1 Dec 2023 22:28:59 +0800 Subject: [PATCH 44/50] [branch-2.0](fix) Fix extremely high CPU usage caused by rf merge #27894 (#27895) --- be/src/runtime/runtime_filter_mgr.cpp | 7 +++---- be/src/runtime/runtime_filter_mgr.h | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/be/src/runtime/runtime_filter_mgr.cpp b/be/src/runtime/runtime_filter_mgr.cpp index abfa40847014a9..73fcae57fcef27 100644 --- a/be/src/runtime/runtime_filter_mgr.cpp +++ b/be/src/runtime/runtime_filter_mgr.cpp @@ -39,7 +39,6 @@ #include "runtime/runtime_state.h" #include "runtime/thread_context.h" #include "util/brpc_client_cache.h" -#include "util/spinlock.h" namespace doris { @@ -227,7 +226,7 @@ Status RuntimeFilterMergeControllerEntity::_init_with_desc( auto filter_id = runtime_filter_desc->filter_id; // LOG(INFO) << "entity filter id:" << filter_id; cntVal->filter->init_with_desc(&cntVal->runtime_filter_desc, query_options, -1, false); - _filter_map.emplace(filter_id, CntlValwithLock {cntVal, std::make_unique()}); + _filter_map.emplace(filter_id, CntlValwithLock {cntVal, std::make_unique()}); return Status::OK(); } @@ -249,7 +248,7 @@ Status RuntimeFilterMergeControllerEntity::_init_with_desc( auto filter_id = runtime_filter_desc->filter_id; // LOG(INFO) << "entity filter id:" << filter_id; cntVal->filter->init_with_desc(&cntVal->runtime_filter_desc, query_options); - _filter_map.emplace(filter_id, CntlValwithLock {cntVal, std::make_unique()}); + _filter_map.emplace(filter_id, CntlValwithLock {cntVal, std::make_unique()}); return Status::OK(); } @@ -323,7 +322,7 @@ Status RuntimeFilterMergeControllerEntity::merge(const PMergeFilterRequest* requ // iter->second = pair{CntlVal,SpinLock} cntVal = iter->second.first; { - std::lock_guard l(*iter->second.second); + std::lock_guard l(*iter->second.second); MergeRuntimeFilterParams params(request, attach_data); ObjectPool* pool = cntVal->pool.get(); RuntimeFilterWrapperHolder holder; diff --git a/be/src/runtime/runtime_filter_mgr.h b/be/src/runtime/runtime_filter_mgr.h index 2048229cd2e928..187fa374526611 100644 --- a/be/src/runtime/runtime_filter_mgr.h +++ b/be/src/runtime/runtime_filter_mgr.h @@ -170,7 +170,7 @@ class RuntimeFilterMergeControllerEntity { std::shared_mutex _filter_map_mutex; std::shared_ptr _mem_tracker; using CntlValwithLock = - std::pair, std::unique_ptr>; + std::pair, std::unique_ptr>; std::map _filter_map; RuntimeState* _state; bool _opt_remote_rf = true; From 1e14a921878b445e22f9387e529949704ad5553c Mon Sep 17 00:00:00 2001 From: Kang Date: Sat, 2 Dec 2023 07:42:07 +0800 Subject: [PATCH 45/50] [fix](stacktrace) ignore stacktrace for error code INVALID_ARGUMENT INVERTED_INDEX_NOT_IMPLEMENTED (#27898) * ignore stacktrace for error INVALID_ARGUMENT INVERTED_INDEX_NOT_IMPLEMENTED * AndBlockColumnPredicate::evaluate --- be/src/common/status.h | 9 ++++++--- be/src/olap/block_column_predicate.cpp | 2 +- be/src/olap/block_column_predicate.h | 2 +- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 2 +- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/be/src/common/status.h b/be/src/common/status.h index 88981fe808b329..e0c413db6f8581 100644 --- a/be/src/common/status.h +++ b/be/src/common/status.h @@ -272,9 +272,10 @@ E(INVERTED_INDEX_NO_TERMS, -6005); E(INVERTED_INDEX_RENAME_FILE_FAILED, -6006); E(INVERTED_INDEX_EVALUATE_SKIPPED, -6007); E(INVERTED_INDEX_BUILD_WAITTING, -6008); -E(KEY_NOT_FOUND, -6009); -E(KEY_ALREADY_EXISTS, -6010); -E(ENTRY_NOT_FOUND, -6011); +E(INVERTED_INDEX_NOT_IMPLEMENTED, -6009); +E(KEY_NOT_FOUND, -7000); +E(KEY_ALREADY_EXISTS, -7001); +E(ENTRY_NOT_FOUND, -7002); #undef E } // namespace ErrorCode @@ -300,6 +301,7 @@ constexpr bool capture_stacktrace(int code) { && code != ErrorCode::SEGCOMPACTION_INIT_READER && code != ErrorCode::SEGCOMPACTION_INIT_WRITER && code != ErrorCode::SEGCOMPACTION_FAILED + && code != ErrorCode::INVALID_ARGUMENT && code != ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS && code != ErrorCode::INVERTED_INDEX_NOT_SUPPORTED && code != ErrorCode::INVERTED_INDEX_CLUCENE_ERROR @@ -308,6 +310,7 @@ constexpr bool capture_stacktrace(int code) { && code != ErrorCode::INVERTED_INDEX_NO_TERMS && code != ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED && code != ErrorCode::INVERTED_INDEX_BUILD_WAITTING + && code != ErrorCode::INVERTED_INDEX_NOT_IMPLEMENTED && code != ErrorCode::META_KEY_NOT_FOUND && code != ErrorCode::PUSH_VERSION_ALREADY_EXIST && code != ErrorCode::VERSION_NOT_EXIST diff --git a/be/src/olap/block_column_predicate.cpp b/be/src/olap/block_column_predicate.cpp index 8cfb89363cdd24..13e7da1a6f9f90 100644 --- a/be/src/olap/block_column_predicate.cpp +++ b/be/src/olap/block_column_predicate.cpp @@ -216,7 +216,7 @@ void AndBlockColumnPredicate::evaluate_vec(vectorized::MutableColumns& block, ui Status AndBlockColumnPredicate::evaluate(const std::string& column_name, InvertedIndexIterator* iterator, uint32_t num_rows, roaring::Roaring* bitmap) const { - return Status::NotSupported( + return Status::Error( "Not Implemented evaluate with inverted index, please check the predicate"); } diff --git a/be/src/olap/block_column_predicate.h b/be/src/olap/block_column_predicate.h index b29fad56278749..62a6b4353e2f78 100644 --- a/be/src/olap/block_column_predicate.h +++ b/be/src/olap/block_column_predicate.h @@ -94,7 +94,7 @@ class BlockColumnPredicate { //evaluate predicate on inverted virtual Status evaluate(const std::string& column_name, InvertedIndexIterator* iterator, uint32_t num_rows, roaring::Roaring* bitmap) const { - return Status::NotSupported( + return Status::Error( "Not Implemented evaluate with inverted index, please check the predicate"); } }; diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 0d325e351a2d84..972f3f967e366d 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -1028,7 +1028,7 @@ Status SegmentIterator::_apply_inverted_index_on_block_column_predicate( return res; } else { //TODO:mock until AndBlockColumnPredicate evaluate is ok. - if (res.code() == ErrorCode::NOT_IMPLEMENTED_ERROR) { + if (res.code() == ErrorCode::INVERTED_INDEX_NOT_IMPLEMENTED) { return Status::OK(); } LOG(WARNING) << "failed to evaluate index" From ab33103cce0f2f43cbd77ac89aad7d42bf530c3c Mon Sep 17 00:00:00 2001 From: minghong Date: Sat, 2 Dec 2023 18:03:31 +0800 Subject: [PATCH 46/50] [opt](nereids) Branch-2.0: remove partition & histogram from col stats to reduce memory usage #27885 (#27896) --- .../nereids/stats/ExpressionEstimation.java | 4 +- .../doris/nereids/stats/FilterEstimation.java | 113 ------------------ .../doris/nereids/stats/StatsCalculator.java | 22 ---- .../doris/statistics/ColumnStatistic.java | 36 +----- .../statistics/ColumnStatisticBuilder.java | 37 +----- .../doris/statistics/StatisticsCache.java | 43 ------- .../doris/nereids/util/HyperGraphBuilder.java | 4 +- .../apache/doris/statistics/CacheTest.java | 2 +- .../statistics/StatsDeriveResultTest.java | 4 +- 9 files changed, 12 insertions(+), 253 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java index aa1903e7b37ebb..f231126417ee04 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java @@ -417,7 +417,6 @@ public ColumnStatistic visitComparisonPredicate(ComparisonPredicate cp, Statisti ColumnStatistic rightStats = cp.right().accept(this, context); return new ColumnStatisticBuilder(leftStats) .setNumNulls(StatsMathUtil.maxNonNaN(leftStats.numNulls, rightStats.numNulls)) - .setHistogram(null) .setNdv(2).build(); } @@ -430,7 +429,7 @@ public ColumnStatistic visitCompoundPredicate(CompoundPredicate compoundPredicat ColumnStatistic columnStatistic = childExprs.get(i).accept(this, context); maxNull = StatsMathUtil.maxNonNaN(maxNull, columnStatistic.numNulls); } - return new ColumnStatisticBuilder(firstChild).setNumNulls(maxNull).setNdv(2).setHistogram(null).build(); + return new ColumnStatisticBuilder(firstChild).setNumNulls(maxNull).setNdv(2).build(); } @Override @@ -707,7 +706,6 @@ public ColumnStatistic visitRandom(Random random, Statistics context) { .setMinValue(0) .setMaxValue(1) .setNumNulls(0) - .setHistogram(null) .setAvgSizeByte(random.getDataType().width()) .setDataSize(random.getDataType().width() * context.getRowCount()).build(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java index a412ff375fd63e..0a7d6b70c39e3d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java @@ -40,11 +40,8 @@ import org.apache.doris.nereids.trees.expressions.functions.Function; import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; -import org.apache.doris.statistics.Bucket; import org.apache.doris.statistics.ColumnStatistic; import org.apache.doris.statistics.ColumnStatisticBuilder; -import org.apache.doris.statistics.Histogram; -import org.apache.doris.statistics.HistogramBuilder; import org.apache.doris.statistics.StatisticRange; import org.apache.doris.statistics.Statistics; import org.apache.doris.statistics.StatisticsBuilder; @@ -52,7 +49,6 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Sets; -import java.util.ArrayList; import java.util.List; import java.util.Set; import java.util.function.Predicate; @@ -180,10 +176,6 @@ public Statistics visitComparisonPredicate(ComparisonPredicate cp, EstimationCon private Statistics updateLessThanLiteral(Expression leftExpr, ColumnStatistic statsForLeft, ColumnStatistic statsForRight, EstimationContext context, boolean contains) { - if (statsForLeft.hasHistogram()) { - return estimateLessThanLiteralWithHistogram(leftExpr, statsForLeft, - statsForRight.maxValue, context, contains); - } StatisticRange rightRange = new StatisticRange(statsForLeft.minValue, statsForLeft.minExpr, statsForRight.maxValue, statsForRight.maxExpr, statsForLeft.ndv, leftExpr.getDataType()); @@ -194,10 +186,6 @@ private Statistics updateLessThanLiteral(Expression leftExpr, ColumnStatistic st private Statistics updateGreaterThanLiteral(Expression leftExpr, ColumnStatistic statsForLeft, ColumnStatistic statsForRight, EstimationContext context, boolean contains) { - if (statsForLeft.hasHistogram()) { - return estimateGreaterThanLiteralWithHistogram(leftExpr, statsForLeft, - statsForRight.minValue, context, contains); - } StatisticRange rightRange = new StatisticRange(statsForRight.minValue, statsForRight.minExpr, statsForLeft.maxValue, statsForLeft.maxExpr, statsForLeft.ndv, leftExpr.getDataType()); @@ -237,10 +225,6 @@ private Statistics estimateEqualTo(ComparisonPredicate cp, ColumnStatistic stats } else { selectivity = StatsMathUtil.minNonNaN(1.0, 1.0 / ndv); } - if (statsForLeft.hasHistogram()) { - return estimateEqualToWithHistogram(cp.left(), statsForLeft, val, context); - } - Statistics equalStats = context.statistics.withSel(selectivity); Expression left = cp.left(); equalStats.addColumnStats(left, statsForRight); @@ -569,103 +553,6 @@ private Statistics estimateColumnLessThanColumn(Expression leftExpr, ColumnStati .addColumnStats(rightExpr, rightColumnStatistic); } - private Statistics estimateLessThanLiteralWithHistogram(Expression leftExpr, ColumnStatistic leftStats, - double numVal, EstimationContext context, boolean contains) { - Histogram leftHist = leftStats.histogram; - - for (int i = 0; i < leftHist.buckets.size(); i++) { - Bucket bucket = leftHist.buckets.get(i); - if (bucket.upper >= numVal && bucket.lower <= numVal) { - double overlapPercentInBucket; - if (numVal == bucket.upper && numVal == bucket.lower) { - if (contains) { - overlapPercentInBucket = 1; - } else { - overlapPercentInBucket = 0; - } - } else { - overlapPercentInBucket = StatsMathUtil.minNonNaN(1, (numVal - bucket.lower) - / (bucket.upper - bucket.lower)); - } - double overlapCountInBucket = overlapPercentInBucket * bucket.count; - double sel = StatsMathUtil.minNonNaN(1, (bucket.preSum + overlapCountInBucket) - / StatsMathUtil.nonZeroDivisor(context.statistics.getRowCount())); - List updatedBucketList = leftHist.buckets.subList(0, i + 1); - updatedBucketList.add(new Bucket(bucket.lower, numVal, overlapCountInBucket, - bucket.preSum, overlapPercentInBucket * bucket.ndv)); - ColumnStatistic columnStatistic = new ColumnStatisticBuilder(leftStats) - .setMaxValue(numVal) - .setHistogram(new HistogramBuilder(leftHist).setBuckets(updatedBucketList).build()) - .build(); - context.addKeyIfSlot(leftExpr); - return context.statistics.withSel(sel).addColumnStats(leftExpr, columnStatistic); - } - } - return context.statistics.withSel(0); - } - - private Statistics estimateGreaterThanLiteralWithHistogram(Expression leftExpr, ColumnStatistic leftStats, - double numVal, EstimationContext context, boolean contains) { - Histogram leftHist = leftStats.histogram; - - for (int i = 0; i < leftHist.buckets.size(); i++) { - Bucket bucket = leftHist.buckets.get(i); - if (bucket.upper >= numVal && bucket.lower <= numVal) { - double overlapPercentInBucket; - if (numVal == bucket.upper && numVal == bucket.lower) { - if (contains) { - overlapPercentInBucket = 1; - } else { - overlapPercentInBucket = 0; - } - } else { - overlapPercentInBucket = StatsMathUtil.minNonNaN(1, ((bucket.upper - numVal) - / (bucket.upper - bucket.lower))); - } - double overlapCountInBucket = overlapPercentInBucket * bucket.count; - double sel = StatsMathUtil.minNonNaN(1, - (leftHist.size() - bucket.preSum - (bucket.count - overlapCountInBucket)) - / context.statistics.getRowCount()); - List updatedBucketList = new ArrayList<>(); - updatedBucketList.add(new Bucket(numVal, bucket.upper, overlapPercentInBucket * bucket.count, - 0, overlapPercentInBucket * bucket.ndv)); - updatedBucketList.addAll(leftHist.buckets.subList(i, leftHist.buckets.size())); - ColumnStatistic columnStatistic = new ColumnStatisticBuilder(leftStats) - .setMaxValue(numVal) - .setHistogram(new HistogramBuilder(leftHist).setBuckets(updatedBucketList).build()) - .build(); - context.addKeyIfSlot(leftExpr); - return context.statistics.withSel(sel).addColumnStats(leftExpr, columnStatistic); - } - } - return context.statistics.withSel(0); - } - - private Statistics estimateEqualToWithHistogram(Expression leftExpr, ColumnStatistic leftStats, - double numVal, EstimationContext context) { - Histogram histogram = leftStats.histogram; - - double sel = 0; - for (int i = 0; i < histogram.buckets.size(); i++) { - Bucket bucket = histogram.buckets.get(i); - if (bucket.upper >= numVal && bucket.lower <= numVal) { - sel = (bucket.count / bucket.ndv) / histogram.size(); - } - } - if (sel == 0) { - return Statistics.zero(context.statistics); - } - ColumnStatistic columnStatistic = new ColumnStatisticBuilder(leftStats) - .setHistogram(null) - .setNdv(1) - .setNumNulls(0) - .setMaxValue(numVal) - .setMinValue(numVal) - .build(); - context.addKeyIfSlot(leftExpr); - return context.statistics.withSel(sel).addColumnStats(leftExpr, columnStatistic); - } - @Override public Statistics visitLike(Like like, EstimationContext context) { StatisticsBuilder statsBuilder = new StatisticsBuilder(context.statistics); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 4f626948407077..ba7d551a4ef682 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -599,17 +599,6 @@ private ColumnStatistic getColumnStatistic(TableIf table, String colName) { } } - private Histogram getColumnHistogram(TableIf table, String colName) { - // if (totalHistogramMap.get(table.getName() + colName) != null) { - // return totalHistogramMap.get(table.getName() + colName); - // } else if (isPlayNereidsDump) { - // return null; - // } else { - // return Env.getCurrentEnv().getStatisticsCache().getHistogram(table.getId(), colName); - // } - return null; - } - // TODO: 1. Subtract the pruned partition // 2. Consider the influence of runtime filter // 3. Get NDV and column data size from StatisticManger, StatisticManager doesn't support it now. @@ -641,17 +630,6 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { } if (!cache.isUnKnown) { rowCount = Math.max(rowCount, cache.count); - Histogram histogram = getColumnHistogram(table, colName); - if (histogram != null) { - ColumnStatisticBuilder columnStatisticBuilder = - new ColumnStatisticBuilder(cache).setHistogram(histogram); - cache = columnStatisticBuilder.build(); - if (ConnectContext.get().getSessionVariable().isEnableMinidump() - && !ConnectContext.get().getSessionVariable().isPlayNereidsDump()) { - totalColumnStatisticMap.put(table.getName() + ":" + colName, cache); - totalHistogramMap.put(table.getName() + colName, histogram); - } - } } columnStatisticMap.put(slotReference, cache); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java index 5ea1f9097bb052..5637455b1720c1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java @@ -19,7 +19,6 @@ import org.apache.doris.analysis.LiteralExpr; import org.apache.doris.catalog.Column; -import org.apache.doris.catalog.PartitionInfo; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; import org.apache.doris.datasource.InternalCatalog; @@ -89,21 +88,12 @@ public class ColumnStatistic { public final LiteralExpr minExpr; public final LiteralExpr maxExpr; - @SerializedName("histogram") - // assign value when do stats estimation. - public final Histogram histogram; - - @SerializedName("partitionIdToColStats") - public final Map partitionIdToColStats = new HashMap<>(); - public final String updatedTime; - public final PartitionInfo partitionInfo; - public ColumnStatistic(double count, double ndv, ColumnStatistic original, double avgSizeByte, double numNulls, double dataSize, double minValue, double maxValue, - LiteralExpr minExpr, LiteralExpr maxExpr, boolean isUnKnown, Histogram histogram, - String updatedTime, PartitionInfo partitionInfo) { + LiteralExpr minExpr, LiteralExpr maxExpr, boolean isUnKnown, + String updatedTime) { this.count = count; this.ndv = ndv; this.original = original; @@ -115,9 +105,7 @@ public ColumnStatistic(double count, double ndv, ColumnStatistic original, doubl this.minExpr = minExpr; this.maxExpr = maxExpr; this.isUnKnown = isUnKnown; - this.histogram = histogram; this.updatedTime = updatedTime; - this.partitionInfo = partitionInfo; } public static ColumnStatistic fromResultRow(List resultRows) { @@ -139,7 +127,6 @@ public static ColumnStatistic fromResultRow(List resultRows) { if (columnStatistic == null) { return ColumnStatistic.UNKNOWN; } - columnStatistic.partitionIdToColStats.putAll(partitionIdToColStats); return columnStatistic; } @@ -242,7 +229,7 @@ public boolean hasIntersect(ColumnStatistic other) { public ColumnStatistic updateBySelectivity(double selectivity, double rowCount) { if (isUnKnown) { - return UNKNOWN; + return this; } ColumnStatisticBuilder builder = new ColumnStatisticBuilder(this); Double rowsAfterFilter = rowCount * selectivity; @@ -324,7 +311,6 @@ public JSONObject toJson() { statistic.put("MinExpr", minExpr); statistic.put("MaxExpr", maxExpr); statistic.put("IsUnKnown", isUnKnown); - statistic.put("Histogram", Histogram.serializeToJson(histogram)); statistic.put("Original", original); statistic.put("LastUpdatedTime", updatedTime); return statistic; @@ -374,8 +360,7 @@ public static ColumnStatistic fromJson(String statJson) { null, null, stat.getBoolean("IsUnKnown"), - Histogram.deserializeFromJson(stat.getString("Histogram")), - stat.getString("LastUpdatedTime"), null + stat.getString("LastUpdatedTime") ); } @@ -383,10 +368,6 @@ public boolean minOrMaxIsInf() { return Double.isInfinite(maxValue) || Double.isInfinite(minValue); } - public boolean hasHistogram() { - return histogram != null && histogram != Histogram.UNKNOWN; - } - public double getOriginalNdv() { if (original != null) { return original.ndv; @@ -394,16 +375,7 @@ public double getOriginalNdv() { return ndv; } - // TODO expanded this function to support more cases, help to compute the change of ndv density - public boolean rangeChanged() { - return original != null && (minValue != original.minValue || maxValue != original.maxValue); - } - public boolean isUnKnown() { return isUnKnown; } - - public void putPartStats(String partId, ColumnStatistic columnStatistic) { - this.partitionIdToColStats.put(partId, columnStatistic); - } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java index f97459555c87ce..f8ed6a1b6ab0d7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatisticBuilder.java @@ -18,10 +18,6 @@ package org.apache.doris.statistics; import org.apache.doris.analysis.LiteralExpr; -import org.apache.doris.catalog.PartitionInfo; - -import java.util.HashMap; -import java.util.Map; public class ColumnStatisticBuilder { private double count; @@ -36,28 +32,13 @@ public class ColumnStatisticBuilder { private boolean isUnknown; - private Histogram histogram; - private ColumnStatistic original; - private Map partitionIdToColStats = new HashMap<>(); - private String updatedTime; - private PartitionInfo partitionInfo; - public ColumnStatisticBuilder() { } - public PartitionInfo getPartitionInfo() { - return partitionInfo; - } - - public ColumnStatisticBuilder setPartitionInfo(PartitionInfo partitionInfo) { - this.partitionInfo = partitionInfo; - return this; - } - public ColumnStatisticBuilder(ColumnStatistic columnStatistic) { this.count = columnStatistic.count; this.ndv = columnStatistic.ndv; @@ -69,11 +50,8 @@ public ColumnStatisticBuilder(ColumnStatistic columnStatistic) { this.minExpr = columnStatistic.minExpr; this.maxExpr = columnStatistic.maxExpr; this.isUnknown = columnStatistic.isUnKnown; - this.histogram = columnStatistic.histogram; this.original = columnStatistic.original; - this.partitionIdToColStats.putAll(columnStatistic.partitionIdToColStats); this.updatedTime = columnStatistic.updatedTime; - this.partitionInfo = columnStatistic.partitionInfo; } public ColumnStatisticBuilder setCount(double count) { @@ -171,15 +149,6 @@ public boolean isUnknown() { return isUnknown; } - public Histogram getHistogram() { - return histogram; - } - - public ColumnStatisticBuilder setHistogram(Histogram histogram) { - this.histogram = histogram; - return this; - } - public String getUpdatedTime() { return updatedTime; } @@ -194,13 +163,11 @@ public ColumnStatistic build() { if (original == null && !isUnknown) { original = new ColumnStatistic(count, ndv, null, avgSizeByte, numNulls, dataSize, minValue, maxValue, minExpr, maxExpr, - isUnknown, histogram, updatedTime, partitionInfo); - original.partitionIdToColStats.putAll(partitionIdToColStats); + isUnknown, updatedTime); } ColumnStatistic colStats = new ColumnStatistic(count, ndv, original, avgSizeByte, numNulls, dataSize, minValue, maxValue, minExpr, maxExpr, - isUnknown, histogram, updatedTime, partitionInfo); - colStats.partitionIdToColStats.putAll(partitionIdToColStats); + isUnknown, updatedTime); return colStats; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java index c9b049a8cfc083..84110d5bda12dc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsCache.java @@ -39,11 +39,9 @@ import java.time.Duration; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; @@ -205,11 +203,6 @@ private void doPreHeat() { LOG.warn("Error when preheating stats cache", t); } } - try { - loadPartStats(keyToColStats); - } catch (Exception e) { - LOG.warn("Fucka", e); - } } /** @@ -262,40 +255,4 @@ public void putCache(StatisticsCacheKey k, ColumnStatistic c) { f.obtrudeValue(Optional.of(c)); columnStatisticsCache.put(k, f); } - - private void loadPartStats(Map keyToColStats) { - final int batchSize = Config.expr_children_limit; - Set keySet = new HashSet<>(); - for (StatisticsCacheKey statisticsCacheKey : keyToColStats.keySet()) { - if (keySet.size() < batchSize - 1) { - keySet.add(statisticsCacheKey); - } else { - List partStats = StatisticsRepository.loadPartStats(keySet); - addPartStatsToColStats(keyToColStats, partStats); - keySet = new HashSet<>(); - } - } - if (!keySet.isEmpty()) { - List partStats = StatisticsRepository.loadPartStats(keySet); - addPartStatsToColStats(keyToColStats, partStats); - } - } - - private void addPartStatsToColStats(Map keyToColStats, - List partsStats) { - for (ResultRow r : partsStats) { - try { - StatsId statsId = new StatsId(r); - long tblId = statsId.tblId; - long idxId = statsId.idxId; - String partId = statsId.partId; - String colId = statsId.colId; - ColumnStatistic partStats = ColumnStatistic.fromResultRow(r); - keyToColStats.get(new StatisticsCacheKey(tblId, idxId, colId)).putPartStats(partId, partStats); - } catch (Throwable t) { - LOG.warn("Failed to deserialized part stats", t); - } - } - } - } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/util/HyperGraphBuilder.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/HyperGraphBuilder.java index e33c28ae933950..792d03697ae9d4 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/util/HyperGraphBuilder.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/HyperGraphBuilder.java @@ -341,8 +341,8 @@ private Statistics injectRowcount(LogicalOlapScan scanPlan) { for (Slot slot : scanPlan.getOutput()) { slotIdToColumnStats.put(slot, new ColumnStatistic(count, count, null, 1, 0, 0, 0, - count, null, null, true, null, - new Date().toString(), null)); + count, null, null, true, + new Date().toString())); } return new Statistics(count, slotIdToColumnStats); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java index cde8d20177f490..587a5b859a160c 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/CacheTest.java @@ -266,7 +266,7 @@ public HMSExternalTable getTableNullable(long tableId) { table.getColumnStatistic("col"); result = new ColumnStatistic(1, 2, null, 3, 4, 5, 6, 7, - null, null, false, null, new Date().toString(), null); + null, null, false, new Date().toString()); } }; try { diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsDeriveResultTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsDeriveResultTest.java index a1ff5b13587522..c3f04bccfc8b28 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsDeriveResultTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatsDeriveResultTest.java @@ -29,8 +29,8 @@ public class StatsDeriveResultTest { public void testUpdateRowCountByLimit() { StatsDeriveResult stats = new StatsDeriveResult(100); ColumnStatistic a = new ColumnStatistic(100, 10, null, 1, 5, 10, - 1, 100, null, null, false, null, - new Date().toString(), null); + 1, 100, null, null, false, + new Date().toString()); Id id = new Id(1); stats.addColumnStats(id, a); StatsDeriveResult res = stats.updateByLimit(0); From 8a385c863d13ebc508593764994198a4bf93b3d3 Mon Sep 17 00:00:00 2001 From: minghong Date: Sat, 2 Dec 2023 19:11:14 +0800 Subject: [PATCH 47/50] [pick](Nereids) temporary partition is selected only if user manually specified: Branch-2.0 #27893 (#27905) --- .../expression/rules/PartitionPruner.java | 10 ---------- .../rules/rewrite/PruneOlapScanPartition.java | 18 ++++++++++++------ 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java index a033629547619d..ae3a5add083486 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java @@ -18,7 +18,6 @@ package org.apache.doris.nereids.rules.expression.rules; import org.apache.doris.catalog.ListPartitionItem; -import org.apache.doris.catalog.PartitionInfo; import org.apache.doris.catalog.PartitionItem; import org.apache.doris.catalog.RangePartitionItem; import org.apache.doris.nereids.CascadesContext; @@ -98,15 +97,6 @@ public List prune() { .collect(ImmutableList.toImmutableList()); } - /** - * prune partition with `partitionInfo` as parameter. - */ - public static List prune(List partitionSlots, Expression partitionPredicate, - PartitionInfo partitionInfo, CascadesContext cascadesContext, PartitionTableType partitionTableType) { - return prune(partitionSlots, partitionPredicate, partitionInfo.getAllPartitions(), cascadesContext, - partitionTableType); - } - /** * prune partition with `idToPartitions` as parameter. */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanPartition.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanPartition.java index 1bdff7f6c10eba..655b8c7976b78e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanPartition.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PruneOlapScanPartition.java @@ -19,6 +19,7 @@ import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.PartitionInfo; +import org.apache.doris.catalog.PartitionItem; import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; import org.apache.doris.nereids.rules.expression.rules.PartitionPruner; @@ -29,7 +30,6 @@ import org.apache.doris.nereids.util.Utils; import com.google.common.collect.ImmutableList; -import org.apache.commons.collections.CollectionUtils; import java.util.ArrayList; import java.util.List; @@ -64,15 +64,21 @@ public Rule build() { .stream() .map(column -> scanOutput.get(column.getName().toLowerCase())) .collect(Collectors.toList()); + List manuallySpecifiedPartitions = scan.getManuallySpecifiedPartitions(); + Map idToPartitions; + if (manuallySpecifiedPartitions.isEmpty()) { + idToPartitions = partitionInfo.getIdToItem(false); + } else { + Map allPartitions = partitionInfo.getAllPartitions(); + idToPartitions = allPartitions.keySet().stream() + .filter(id -> manuallySpecifiedPartitions.contains(id)) + .collect(Collectors.toMap(Function.identity(), id -> allPartitions.get(id))); + } List prunedPartitions = new ArrayList<>(PartitionPruner.prune( - partitionSlots, filter.getPredicate(), partitionInfo, ctx.cascadesContext, + partitionSlots, filter.getPredicate(), idToPartitions, ctx.cascadesContext, PartitionTableType.OLAP)); - List manuallySpecifiedPartitions = scan.getManuallySpecifiedPartitions(); - if (!CollectionUtils.isEmpty(manuallySpecifiedPartitions)) { - prunedPartitions.retainAll(manuallySpecifiedPartitions); - } LogicalOlapScan rewrittenScan = scan.withSelectedPartitionIds(ImmutableList.copyOf(prunedPartitions)); return new LogicalFilter<>(filter.getConjuncts(), rewrittenScan); }).toRule(RuleType.OLAP_SCAN_PARTITION_PRUNE); From 27731da69a793d80e74343d325d61eea0d0a0d05 Mon Sep 17 00:00:00 2001 From: slothever <18522955+wsjz@users.noreply.github.com> Date: Sun, 3 Dec 2023 10:18:54 +0800 Subject: [PATCH 48/50] [fix](multi-catalog)support the max compute partition prune (#27154) (#27902) backport #27154 --- be/src/runtime/descriptors.cpp | 1 - be/src/runtime/descriptors.h | 2 - .../format/table/max_compute_jni_reader.cpp | 9 +- .../format/table/max_compute_jni_reader.h | 12 +- be/src/vec/exec/scan/vfile_scanner.cpp | 12 +- .../maxcompute/MaxComputeJniScanner.java | 32 +++- .../doris/analysis/ShowPartitionsStmt.java | 14 +- .../external/MaxComputeExternalTable.java | 177 ++++++++++-------- .../datasource/ExternalMetaCacheMgr.java | 10 + .../doris/datasource/MaxComputeCacheKey.java | 65 +++++++ .../datasource/MaxComputeExternalCatalog.java | 61 ++++-- .../datasource/MaxComputeMetadataCache.java | 90 +++++++++ .../MaxComputeMetadataCacheMgr.java | 64 +++++++ .../hive/PooledHiveMetaStoreClient.java | 9 +- .../planner/external/FileQueryScanNode.java | 2 + .../planner/external/MaxComputeScanNode.java | 121 +++++++++--- .../planner/external/MaxComputeSplit.java | 40 ++++ .../planner/external/TableFormatType.java | 1 + .../external/TablePartitionValues.java | 9 +- .../hudi/HudiCachedPartitionProcessor.java | 8 +- .../org/apache/doris/qe/ShowExecutor.java | 37 +++- gensrc/thrift/Descriptors.thrift | 1 - gensrc/thrift/PlanNodes.thrift | 4 + .../test_external_catalog_maxcompute.out | 46 ++++- .../test_external_catalog_maxcompute.groovy | 17 +- 25 files changed, 687 insertions(+), 157 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeCacheKey.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeMetadataCache.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeMetadataCacheMgr.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/planner/external/MaxComputeSplit.java diff --git a/be/src/runtime/descriptors.cpp b/be/src/runtime/descriptors.cpp index 15a4b773264ab4..f8125588ae5602 100644 --- a/be/src/runtime/descriptors.cpp +++ b/be/src/runtime/descriptors.cpp @@ -192,7 +192,6 @@ MaxComputeTableDescriptor::MaxComputeTableDescriptor(const TTableDescriptor& tde _table(tdesc.mcTable.table), _access_key(tdesc.mcTable.access_key), _secret_key(tdesc.mcTable.secret_key), - _partition_spec(tdesc.mcTable.partition_spec), _public_access(tdesc.mcTable.public_access) {} MaxComputeTableDescriptor::~MaxComputeTableDescriptor() = default; diff --git a/be/src/runtime/descriptors.h b/be/src/runtime/descriptors.h index 9f9e4fb44fa051..94f9e084894957 100644 --- a/be/src/runtime/descriptors.h +++ b/be/src/runtime/descriptors.h @@ -238,7 +238,6 @@ class MaxComputeTableDescriptor : public TableDescriptor { const std::string table() const { return _table; } const std::string access_key() const { return _access_key; } const std::string secret_key() const { return _secret_key; } - const std::string partition_spec() const { return _partition_spec; } const std::string public_access() const { return _public_access; } private: @@ -247,7 +246,6 @@ class MaxComputeTableDescriptor : public TableDescriptor { std::string _table; std::string _access_key; std::string _secret_key; - std::string _partition_spec; std::string _public_access; }; diff --git a/be/src/vec/exec/format/table/max_compute_jni_reader.cpp b/be/src/vec/exec/format/table/max_compute_jni_reader.cpp index 34db6a1df4d484..7ba714eedd5deb 100644 --- a/be/src/vec/exec/format/table/max_compute_jni_reader.cpp +++ b/be/src/vec/exec/format/table/max_compute_jni_reader.cpp @@ -38,10 +38,15 @@ class Block; namespace doris::vectorized { MaxComputeJniReader::MaxComputeJniReader(const MaxComputeTableDescriptor* mc_desc, + const TMaxComputeFileDesc& max_compute_params, const std::vector& file_slot_descs, const TFileRangeDesc& range, RuntimeState* state, RuntimeProfile* profile) - : _file_slot_descs(file_slot_descs), _range(range), _state(state), _profile(profile) { + : _max_compute_params(max_compute_params), + _file_slot_descs(file_slot_descs), + _range(range), + _state(state), + _profile(profile) { _table_desc = mc_desc; std::ostringstream required_fields; std::ostringstream columns_types; @@ -64,7 +69,7 @@ MaxComputeJniReader::MaxComputeJniReader(const MaxComputeTableDescriptor* mc_des {"access_key", _table_desc->access_key()}, {"secret_key", _table_desc->secret_key()}, {"project", _table_desc->project()}, - {"partition_spec", _table_desc->partition_spec()}, + {"partition_spec", _max_compute_params.partition_spec}, {"table", _table_desc->table()}, {"public_access", _table_desc->public_access()}, {"start_offset", std::to_string(_range.start_offset)}, diff --git a/be/src/vec/exec/format/table/max_compute_jni_reader.h b/be/src/vec/exec/format/table/max_compute_jni_reader.h index 0b3c809c50243f..e027678148fd0d 100644 --- a/be/src/vec/exec/format/table/max_compute_jni_reader.h +++ b/be/src/vec/exec/format/table/max_compute_jni_reader.h @@ -54,6 +54,7 @@ class MaxComputeJniReader : public GenericReader { public: MaxComputeJniReader(const MaxComputeTableDescriptor* mc_desc, + const TMaxComputeFileDesc& max_compute_params, const std::vector& file_slot_descs, const TFileRangeDesc& range, RuntimeState* state, RuntimeProfile* profile); @@ -68,13 +69,14 @@ class MaxComputeJniReader : public GenericReader { std::unordered_map* colname_to_value_range); private: - const MaxComputeTableDescriptor* _table_desc; + const MaxComputeTableDescriptor* _table_desc = nullptr; + const TMaxComputeFileDesc& _max_compute_params; const std::vector& _file_slot_descs; const TFileRangeDesc& _range; - RuntimeState* _state; - RuntimeProfile* _profile; - std::unordered_map* _colname_to_value_range; - std::unique_ptr _jni_connector; + RuntimeState* _state = nullptr; + RuntimeProfile* _profile = nullptr; + std::unordered_map* _colname_to_value_range = nullptr; + std::unique_ptr _jni_connector = nullptr; }; } // namespace doris::vectorized diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp index d0f26835c69b63..ede6767b7afa6a 100644 --- a/be/src/vec/exec/scan/vfile_scanner.cpp +++ b/be/src/vec/exec/scan/vfile_scanner.cpp @@ -683,13 +683,13 @@ Status VFileScanner::_get_next_reader() { bool need_to_get_parsed_schema = false; switch (format_type) { case TFileFormatType::FORMAT_JNI: { - if (_real_tuple_desc->table_desc()->table_type() == - ::doris::TTableType::type::MAX_COMPUTE_TABLE) { - const MaxComputeTableDescriptor* mc_desc = - static_cast( - _real_tuple_desc->table_desc()); + if (range.__isset.table_format_params && + range.table_format_params.table_format_type == "max_compute") { + const auto* mc_desc = static_cast( + _real_tuple_desc->table_desc()); std::unique_ptr mc_reader = MaxComputeJniReader::create_unique( - mc_desc, _file_slot_descs, range, _state, _profile); + mc_desc, range.table_format_params.max_compute_params, _file_slot_descs, + range, _state, _profile); init_status = mc_reader->init_reader(_colname_to_value_range); _cur_reader = std::move(mc_reader); } else if (range.__isset.table_format_params && diff --git a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeJniScanner.java b/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeJniScanner.java index 0d80546cdfb820..f4a8a9c8fc6cd5 100644 --- a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeJniScanner.java +++ b/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeJniScanner.java @@ -61,11 +61,11 @@ public class MaxComputeJniScanner extends JniScanner { private static final String START_OFFSET = "start_offset"; private static final String SPLIT_SIZE = "split_size"; private static final String PUBLIC_ACCESS = "public_access"; - private final RootAllocator arrowAllocator = new RootAllocator(Integer.MAX_VALUE); private final Map tableScans = new ConcurrentHashMap<>(); private final String region; private final String project; private final String table; + private RootAllocator arrowAllocator; private PartitionSpec partitionSpec; private Set partitionColumns; private MaxComputeTableScan curTableScan; @@ -171,9 +171,14 @@ public void open() throws IOException { partitionColumns = session.getSchema().getPartitionColumns().stream() .map(Column::getName) .collect(Collectors.toSet()); - List maxComputeColumns = new ArrayList<>(readColumns); - maxComputeColumns.removeIf(e -> partitionColumns.contains(e.getName())); - curReader = session.openArrowRecordReader(start, totalRows, maxComputeColumns, arrowAllocator); + List pushDownColumns = new ArrayList<>(readColumns); + pushDownColumns.removeIf(e -> partitionColumns.contains(e.getName())); + if (pushDownColumns.isEmpty() && !partitionColumns.isEmpty()) { + // query columns required non-null, when query partition table + pushDownColumns.add(session.getSchema().getColumn(0)); + } + arrowAllocator = new RootAllocator(Integer.MAX_VALUE); + curReader = session.openArrowRecordReader(start, totalRows, pushDownColumns, arrowAllocator); remainBatchRows = totalRows; } catch (TunnelException e) { if (retryCount > 0 && e.getErrorMsg().contains("TableModified")) { @@ -254,7 +259,8 @@ public void close() throws IOException { startOffset = -1; splitSize = -1; if (curReader != null) { - arrowAllocator.releaseBytes(arrowAllocator.getAllocatedMemory()); + arrowAllocator.close(); + arrowAllocator = null; curReader.close(); curReader = null; } @@ -279,15 +285,25 @@ protected int getNext() throws IOException { private int readVectors(int expectedRows) throws IOException { VectorSchemaRoot batch; int curReadRows = 0; - while (curReadRows < expectedRows && (batch = curReader.read()) != null) { + while (curReadRows < expectedRows) { + batch = curReader.read(); + if (batch == null) { + break; + } try { List fieldVectors = batch.getFieldVectors(); int batchRows = 0; for (FieldVector column : fieldVectors) { + Integer readColumnId = readColumnsToId.get(column.getName()); + if (readColumnId == null) { + // use for partition if no column need to read. + batchRows = column.getValueCount(); + continue; + } columnValue.reset(column); batchRows = column.getValueCount(); for (int j = 0; j < batchRows; j++) { - appendData(readColumnsToId.get(column.getName()), columnValue); + appendData(readColumnId, columnValue); } } if (partitionSpec != null) { @@ -303,6 +319,8 @@ private int readVectors(int expectedRows) throws IOException { } } curReadRows += batchRows; + } catch (Exception e) { + throw new RuntimeException("Fail to read arrow data, reason: " + e.getMessage(), e); } finally { batch.close(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowPartitionsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowPartitionsStmt.java index f6e9b06e0b1eea..dc1d360f290ad7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowPartitionsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ShowPartitionsStmt.java @@ -26,6 +26,7 @@ import org.apache.doris.catalog.TableIf.TableType; import org.apache.doris.catalog.Type; import org.apache.doris.catalog.external.HMSExternalTable; +import org.apache.doris.catalog.external.MaxComputeExternalTable; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.ErrorCode; import org.apache.doris.common.ErrorReport; @@ -37,6 +38,7 @@ import org.apache.doris.common.util.OrderByPair; import org.apache.doris.datasource.CatalogIf; import org.apache.doris.datasource.HMSExternalCatalog; +import org.apache.doris.datasource.MaxComputeExternalCatalog; import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.ShowResultSetMetaData; @@ -126,7 +128,7 @@ public void analyze(Analyzer analyzer) throws UserException { DatabaseIf db = catalog.getDbOrAnalysisException(dbName); TableIf table = db.getTableOrMetaException(tblName, Table.TableType.OLAP, TableType.MATERIALIZED_VIEW, - TableType.HMS_EXTERNAL_TABLE); + TableType.HMS_EXTERNAL_TABLE, TableType.MAX_COMPUTE_EXTERNAL_TABLE); if (table instanceof HMSExternalTable) { if (((HMSExternalTable) table).isView()) { @@ -138,6 +140,13 @@ public void analyze(Analyzer analyzer) throws UserException { return; } + if (table instanceof MaxComputeExternalTable) { + if (((MaxComputeExternalTable) table).getOdpsTable().getPartitions().isEmpty()) { + throw new AnalysisException("Table " + tblName + " is not a partitioned table"); + } + return; + } + table.readLock(); try { // build proc path @@ -170,7 +179,8 @@ public void analyzeImpl(Analyzer analyzer) throws UserException { } // disallow unsupported catalog - if (!(catalog.isInternalCatalog() || catalog instanceof HMSExternalCatalog)) { + if (!(catalog.isInternalCatalog() || catalog instanceof HMSExternalCatalog + || catalog instanceof MaxComputeExternalCatalog)) { throw new AnalysisException(String.format("Catalog of type '%s' is not allowed in ShowPartitionsStmt", catalog.getType())); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/MaxComputeExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/MaxComputeExternalTable.java index 3c2f3bada03574..5c25cf6cce0e04 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/external/MaxComputeExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/external/MaxComputeExternalTable.java @@ -17,25 +17,25 @@ package org.apache.doris.catalog.external; -import org.apache.doris.analysis.BinaryPredicate; -import org.apache.doris.analysis.Expr; -import org.apache.doris.analysis.InPredicate; -import org.apache.doris.analysis.Predicate; -import org.apache.doris.analysis.SlotRef; import org.apache.doris.catalog.ArrayType; import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Env; import org.apache.doris.catalog.MapType; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.StructField; import org.apache.doris.catalog.StructType; import org.apache.doris.catalog.Type; +import org.apache.doris.datasource.MaxComputeCacheKey; import org.apache.doris.datasource.MaxComputeExternalCatalog; +import org.apache.doris.datasource.MaxComputeMetadataCache; +import org.apache.doris.planner.external.TablePartitionValues; import org.apache.doris.thrift.TMCTable; import org.apache.doris.thrift.TTableDescriptor; import org.apache.doris.thrift.TTableType; import com.aliyun.odps.OdpsType; import com.aliyun.odps.Table; +import com.aliyun.odps.tunnel.TunnelException; import com.aliyun.odps.type.ArrayTypeInfo; import com.aliyun.odps.type.CharTypeInfo; import com.aliyun.odps.type.DecimalTypeInfo; @@ -43,17 +43,15 @@ import com.aliyun.odps.type.StructTypeInfo; import com.aliyun.odps.type.TypeInfo; import com.aliyun.odps.type.VarcharTypeInfo; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; -import com.google.common.collect.Sets; import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Optional; import java.util.Set; -import java.util.StringJoiner; +import java.util.stream.Collectors; /** * MaxCompute external table. @@ -61,8 +59,9 @@ public class MaxComputeExternalTable extends ExternalTable { private Table odpsTable; - private Set partitionKeys; - private String partitionSpec; + private List partitionSpecs; + private Map partitionNameToColumns; + private List partitionTypes; public MaxComputeExternalTable(long id, String name, String dbName, MaxComputeExternalCatalog catalog) { super(id, name, catalog, dbName, TableType.MAX_COMPUTE_EXTERNAL_TABLE); @@ -73,12 +72,80 @@ protected synchronized void makeSureInitialized() { super.makeSureInitialized(); if (!objectCreated) { odpsTable = ((MaxComputeExternalCatalog) catalog).getClient().tables().get(name); + initTablePartitions(); objectCreated = true; } } + public long getTotalRows() throws TunnelException { + // use for non-partitioned table + // partition table will read the entire partition on FE so get total rows is unnecessary. + makeSureInitialized(); + MaxComputeMetadataCache metadataCache = Env.getCurrentEnv().getExtMetaCacheMgr() + .getMaxComputeMetadataCache(catalog.getId()); + MaxComputeExternalCatalog mcCatalog = ((MaxComputeExternalCatalog) catalog); + return metadataCache.getCachedRowCount(dbName, name, null, () -> mcCatalog.getTableTunnel() + .getDownloadSession(dbName, name, null) + .getRecordCount()); + } + + @Override + public Set getPartitionNames() { + makeSureInitialized(); + return partitionNameToColumns.keySet(); + } + + public List getPartitionColumns() { + makeSureInitialized(); + return new ArrayList<>(partitionNameToColumns.values()); + } + + public TablePartitionValues getPartitionValues() { + makeSureInitialized(); + // Make sure to call it after initSchema() completes + String projectName = odpsTable.getProject(); + String tableName = odpsTable.getName(); + MaxComputeMetadataCache metadataCache = Env.getCurrentEnv().getExtMetaCacheMgr() + .getMaxComputeMetadataCache(catalog.getId()); + return metadataCache.getCachedPartitionValues( + new MaxComputeCacheKey(projectName, tableName), + () -> { + TablePartitionValues partitionValues = new TablePartitionValues(); + partitionValues.addPartitions(partitionSpecs, + partitionSpecs.stream() + .map(p -> parsePartitionValues(new ArrayList<>(getPartitionNames()), p)) + .collect(Collectors.toList()), + partitionTypes); + return partitionValues; + }); + } + + /** + * parse all values from partitionPath to a single list. + * @param partitionColumns partitionColumns can contain the part1,part2,part3... + * @param partitionPath partitionPath format is like the 'part1=123/part2=abc/part3=1bc' + * @return all values of partitionPath + */ + private static List parsePartitionValues(List partitionColumns, String partitionPath) { + String[] partitionFragments = partitionPath.split("/"); + if (partitionFragments.length != partitionColumns.size()) { + throw new RuntimeException("Failed to parse partition values of path: " + partitionPath); + } + List partitionValues = new ArrayList<>(partitionFragments.length); + for (int i = 0; i < partitionFragments.length; i++) { + String prefix = partitionColumns.get(i) + "="; + if (partitionFragments[i].startsWith(prefix)) { + partitionValues.add(partitionFragments[i].substring(prefix.length())); + } else { + partitionValues.add(partitionFragments[i]); + } + } + return partitionValues; + } + @Override public List initSchema() { + // this method will be called at semantic parsing. makeSureInitialized(); List columns = odpsTable.getSchema().getColumns(); List result = Lists.newArrayListWithCapacity(columns.size()); @@ -86,72 +153,31 @@ public List initSchema() { result.add(new Column(field.getName(), mcTypeToDorisType(field.getTypeInfo()), true, null, true, field.getComment(), true, -1)); } - List partitionColumns = odpsTable.getSchema().getPartitionColumns(); - partitionKeys = new HashSet<>(); - for (com.aliyun.odps.Column partColumn : partitionColumns) { - result.add(new Column(partColumn.getName(), mcTypeToDorisType(partColumn.getTypeInfo()), true, null, - true, partColumn.getComment(), true, -1)); - partitionKeys.add(partColumn.getName()); - } + result.addAll(partitionNameToColumns.values()); return result; } - public Optional getPartitionSpec(List conjuncts) { - if (!partitionKeys.isEmpty()) { - if (conjuncts.isEmpty()) { - throw new IllegalArgumentException("Max Compute partition table need partition predicate."); - } - // recreate partitionSpec when conjuncts is changed. - List partitionConjuncts = parsePartitionConjuncts(conjuncts, partitionKeys); - StringJoiner partitionSpec = new StringJoiner(","); - partitionConjuncts.forEach(partitionSpec::add); - this.partitionSpec = partitionSpec.toString(); - return Optional.of(this.partitionSpec); - } - return Optional.empty(); - } - - private static List parsePartitionConjuncts(List conjuncts, Set partitionKeys) { - List partitionConjuncts = new ArrayList<>(); - Set predicates = Sets.newHashSet(); - for (Expr conjunct : conjuncts) { - // collect depart predicate - conjunct.collect(BinaryPredicate.class, predicates); - conjunct.collect(InPredicate.class, predicates); - } - Map slotToConjuncts = new HashMap<>(); - for (Predicate predicate : predicates) { - List slotRefs = new ArrayList<>(); - if (predicate instanceof BinaryPredicate) { - if (((BinaryPredicate) predicate).getOp() != BinaryPredicate.Operator.EQ) { - // max compute only support the EQ operator: pt='pt-value' - continue; - } - // BinaryPredicate has one left slotRef, and partition value not slotRef - predicate.collect(SlotRef.class, slotRefs); - slotToConjuncts.put(slotRefs.get(0).getColumnName(), predicate); - } else if (predicate instanceof InPredicate) { - predicate.collect(SlotRef.class, slotRefs); - slotToConjuncts.put(slotRefs.get(0).getColumnName(), predicate); - } + private void initTablePartitions() { + List partitionColumns = odpsTable.getSchema().getPartitionColumns(); + if (!partitionColumns.isEmpty()) { + partitionSpecs = odpsTable.getPartitions().stream() + .map(e -> e.getPartitionSpec().toString(false, true)) + .collect(Collectors.toList()); + } else { + partitionSpecs = ImmutableList.of(); } - for (String partitionKey : partitionKeys) { - Predicate partitionPredicate = slotToConjuncts.get(partitionKey); - if (partitionPredicate == null) { - continue; - } - if (partitionPredicate instanceof InPredicate) { - List inList = ((InPredicate) partitionPredicate).getListChildren(); - for (Expr expr : inList) { - String partitionConjunct = partitionKey + "=" + expr.toSql(); - partitionConjuncts.add(partitionConjunct.replace("`", "")); - } - } else { - String partitionConjunct = partitionPredicate.toSql(); - partitionConjuncts.add(partitionConjunct.replace("`", "")); - } + // sort partition columns to align partitionTypes and partitionName. + partitionNameToColumns = new LinkedHashMap<>(); + for (com.aliyun.odps.Column partColumn : partitionColumns) { + Column dorisCol = new Column(partColumn.getName(), + mcTypeToDorisType(partColumn.getTypeInfo()), true, null, + true, partColumn.getComment(), true, -1); + partitionNameToColumns.put(dorisCol.getName(), dorisCol); } - return partitionConjuncts; + partitionTypes = partitionNameToColumns.values() + .stream() + .map(Column::getType) + .collect(Collectors.toList()); } private Type mcTypeToDorisType(TypeInfo typeInfo) { @@ -241,11 +267,10 @@ private Type mcTypeToDorisType(TypeInfo typeInfo) { public TTableDescriptor toThrift() { List schema = getFullSchema(); TMCTable tMcTable = new TMCTable(); - MaxComputeExternalCatalog mcCatalog = (MaxComputeExternalCatalog) catalog; + MaxComputeExternalCatalog mcCatalog = ((MaxComputeExternalCatalog) catalog); tMcTable.setRegion(mcCatalog.getRegion()); tMcTable.setAccessKey(mcCatalog.getAccessKey()); tMcTable.setSecretKey(mcCatalog.getSecretKey()); - tMcTable.setPartitionSpec(this.partitionSpec); tMcTable.setPublicAccess(String.valueOf(mcCatalog.enablePublicAccess())); // use mc project as dbName tMcTable.setProject(dbName); @@ -257,6 +282,7 @@ public TTableDescriptor toThrift() { } public Table getOdpsTable() { + makeSureInitialized(); return odpsTable; } @@ -264,6 +290,5 @@ public Table getOdpsTable() { public String getMysqlType() { return "BASE TABLE"; } - } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalMetaCacheMgr.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalMetaCacheMgr.java index 03a46c625e892c..ef62f498695a66 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalMetaCacheMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalMetaCacheMgr.java @@ -54,6 +54,7 @@ public class ExternalMetaCacheMgr { // all catalogs could share the same fsCache. private FileSystemCache fsCache; private final IcebergMetadataCacheMgr icebergMetadataCacheMgr; + private final MaxComputeMetadataCacheMgr maxComputeMetadataCacheMgr; public ExternalMetaCacheMgr() { executor = ThreadPoolManager.newDaemonFixedThreadPool( @@ -63,6 +64,7 @@ public ExternalMetaCacheMgr() { hudiPartitionMgr = HudiPartitionMgr.get(executor); fsCache = new FileSystemCache(executor); icebergMetadataCacheMgr = new IcebergMetadataCacheMgr(); + maxComputeMetadataCacheMgr = new MaxComputeMetadataCacheMgr(); } public HiveMetaStoreCache getMetaStoreCache(HMSExternalCatalog catalog) { @@ -99,6 +101,10 @@ public IcebergMetadataCache getIcebergMetadataCache() { return icebergMetadataCacheMgr.getIcebergMetadataCache(); } + public MaxComputeMetadataCache getMaxComputeMetadataCache(long catalogId) { + return maxComputeMetadataCacheMgr.getMaxComputeMetadataCache(catalogId); + } + public FileSystemCache getFsCache() { return fsCache; } @@ -112,6 +118,7 @@ public void removeCache(long catalogId) { } hudiPartitionMgr.removePartitionProcessor(catalogId); icebergMetadataCacheMgr.removeCache(catalogId); + maxComputeMetadataCacheMgr.removeCache(catalogId); } public void invalidateTableCache(long catalogId, String dbName, String tblName) { @@ -126,6 +133,7 @@ public void invalidateTableCache(long catalogId, String dbName, String tblName) } hudiPartitionMgr.cleanTablePartitions(catalogId, dbName, tblName); icebergMetadataCacheMgr.invalidateTableCache(catalogId, dbName, tblName); + maxComputeMetadataCacheMgr.invalidateTableCache(catalogId, dbName, tblName); LOG.debug("invalid table cache for {}.{} in catalog {}", dbName, tblName, catalogId); } @@ -141,6 +149,7 @@ public void invalidateDbCache(long catalogId, String dbName) { } hudiPartitionMgr.cleanDatabasePartitions(catalogId, dbName); icebergMetadataCacheMgr.invalidateDbCache(catalogId, dbName); + maxComputeMetadataCacheMgr.invalidateDbCache(catalogId, dbName); LOG.debug("invalid db cache for {} in catalog {}", dbName, catalogId); } @@ -155,6 +164,7 @@ public void invalidateCatalogCache(long catalogId) { } hudiPartitionMgr.cleanPartitionProcess(catalogId); icebergMetadataCacheMgr.invalidateCatalogCache(catalogId); + maxComputeMetadataCacheMgr.invalidateCatalogCache(catalogId); LOG.debug("invalid catalog cache for {}", catalogId); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeCacheKey.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeCacheKey.java new file mode 100644 index 00000000000000..441c2e84474aa4 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeCacheKey.java @@ -0,0 +1,65 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource; + +import lombok.Data; + +import java.util.Objects; + +@Data +public class MaxComputeCacheKey { + private final String dbName; + private final String tblName; + private String partitionSpec; // optional + + public MaxComputeCacheKey(String dbName, String tblName) { + this(dbName, tblName, null); + } + + public MaxComputeCacheKey(String dbName, String tblName, String partitionSpec) { + this.dbName = dbName; + this.tblName = tblName; + this.partitionSpec = partitionSpec; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!(obj instanceof MaxComputeCacheKey)) { + return false; + } + boolean partitionEquals = true; + if (partitionSpec != null) { + partitionEquals = partitionSpec.equals(((MaxComputeCacheKey) obj).partitionSpec); + } + return partitionEquals && dbName.equals(((MaxComputeCacheKey) obj).dbName) + && tblName.equals(((MaxComputeCacheKey) obj).tblName); + } + + @Override + public int hashCode() { + return Objects.hash(dbName, tblName); + } + + @Override + public String toString() { + return "TablePartitionKey{" + "dbName='" + dbName + '\'' + ", tblName='" + tblName + '\'' + '}'; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeExternalCatalog.java index 0cd99678baded3..b361d0c8144fa2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeExternalCatalog.java @@ -24,22 +24,23 @@ import com.aliyun.odps.Odps; import com.aliyun.odps.OdpsException; -import com.aliyun.odps.PartitionSpec; +import com.aliyun.odps.Partition; import com.aliyun.odps.account.Account; import com.aliyun.odps.account.AliyunAccount; import com.aliyun.odps.tunnel.TableTunnel; -import com.aliyun.odps.tunnel.TunnelException; import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; import com.google.gson.annotations.SerializedName; import java.util.ArrayList; +import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.Optional; +import java.util.stream.Collectors; public class MaxComputeExternalCatalog extends ExternalCatalog { private Odps odps; + private TableTunnel tunnel; @SerializedName(value = "region") private String region; @SerializedName(value = "accessKey") @@ -93,23 +94,17 @@ protected void initLocalObjectsImpl() { } odps.setEndpoint(odpsUrl); odps.setDefaultProject(defaultProject); - } - - public long getTotalRows(String project, String table, Optional partitionSpec) throws TunnelException { - makeSureInitialized(); - TableTunnel tunnel = new TableTunnel(odps); + tunnel = new TableTunnel(odps); String tunnelUrl = tunnelUrlTemplate.replace("{}", region); if (enablePublicAccess) { tunnelUrl = tunnelUrl.replace("-inc", ""); } - TableTunnel.DownloadSession downloadSession; tunnel.setEndpoint(tunnelUrl); - if (!partitionSpec.isPresent()) { - downloadSession = tunnel.getDownloadSession(project, table, null); - } else { - downloadSession = tunnel.getDownloadSession(project, table, new PartitionSpec(partitionSpec.get()), null); - } - return downloadSession.getRecordCount(); + } + + public TableTunnel getTableTunnel() { + makeSureInitialized(); + return tunnel; } public Odps getClient() { @@ -139,6 +134,42 @@ public boolean tableExist(SessionContext ctx, String dbName, String tblName) { } } + public List listPartitionNames(String dbName, String tbl) { + return listPartitionNames(dbName, tbl, 0, -1); + } + + public List listPartitionNames(String dbName, String tbl, long skip, long limit) { + try { + if (getClient().projects().exists(dbName)) { + List parts; + if (limit < 0) { + parts = getClient().tables().get(tbl).getPartitions(); + } else { + skip = skip < 0 ? 0 : skip; + parts = new ArrayList<>(); + Iterator it = getClient().tables().get(tbl).getPartitionIterator(); + int count = 0; + while (it.hasNext()) { + if (count < skip) { + count++; + it.next(); + } else if (parts.size() >= limit) { + break; + } else { + parts.add(it.next()); + } + } + } + return parts.stream().map(p -> p.getPartitionSpec().toString(false, true)) + .collect(Collectors.toList()); + } else { + throw new OdpsException("Max compute project: " + dbName + " not exists."); + } + } catch (OdpsException e) { + throw new RuntimeException(e); + } + } + @Override public List listTableNames(SessionContext ctx, String dbName) { makeSureInitialized(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeMetadataCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeMetadataCache.java new file mode 100644 index 00000000000000..98b835813d9d8d --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeMetadataCache.java @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource; + +import org.apache.doris.common.Config; +import org.apache.doris.planner.external.TablePartitionValues; + +import com.aliyun.odps.tunnel.TunnelException; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; + +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +public class MaxComputeMetadataCache { + private final Cache partitionValuesCache; + private final Cache tableRowCountCache; + + public MaxComputeMetadataCache() { + partitionValuesCache = CacheBuilder.newBuilder().maximumSize(Config.max_hive_partition_cache_num) + .expireAfterAccess(Config.external_cache_expire_time_minutes_after_access, TimeUnit.MINUTES) + .build(); + tableRowCountCache = CacheBuilder.newBuilder().maximumSize(10000) + .expireAfterAccess(Config.external_cache_expire_time_minutes_after_access, TimeUnit.MINUTES) + .build(); + } + + public Long getCachedRowCount(String dbName, String tblName, String partitionSpec, + Callable loader) throws TunnelException { + try { + MaxComputeCacheKey tablePartitionKey = new MaxComputeCacheKey(dbName, tblName, partitionSpec); + return tableRowCountCache.get(tablePartitionKey, loader); + } catch (ExecutionException e) { + throw new TunnelException(e.getMessage(), e); + } + } + + public TablePartitionValues getCachedPartitionValues(MaxComputeCacheKey tablePartitionKey, + Callable loader) { + try { + return partitionValuesCache.get(tablePartitionKey, loader); + } catch (ExecutionException e) { + throw new RuntimeException("Fail to load partition values for table:" + + " '" + tablePartitionKey.getDbName() + "." + tablePartitionKey.getTblName() + "'"); + } + } + + public void cleanUp() { + partitionValuesCache.invalidateAll(); + tableRowCountCache.invalidateAll(); + } + + public void cleanDatabaseCache(String dbName) { + List removeCacheList = partitionValuesCache.asMap().keySet() + .stream() + .filter(k -> k.getDbName().equalsIgnoreCase(dbName)) + .collect(Collectors.toList()); + partitionValuesCache.invalidateAll(removeCacheList); + + List removeCacheRowCountList = tableRowCountCache.asMap().keySet() + .stream() + .filter(k -> k.getDbName().equalsIgnoreCase(dbName)) + .collect(Collectors.toList()); + tableRowCountCache.invalidateAll(removeCacheRowCountList); + } + + public void cleanTableCache(String dbName, String tblName) { + MaxComputeCacheKey cacheKey = new MaxComputeCacheKey(dbName, tblName); + partitionValuesCache.invalidate(cacheKey); + tableRowCountCache.invalidate(cacheKey); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeMetadataCacheMgr.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeMetadataCacheMgr.java new file mode 100644 index 00000000000000..72449b61949cd2 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/MaxComputeMetadataCacheMgr.java @@ -0,0 +1,64 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource; + +import com.google.common.collect.Maps; + +import java.util.Map; + +public class MaxComputeMetadataCacheMgr { + + private static final Map maxComputeMetadataCaches = Maps.newConcurrentMap(); + + public MaxComputeMetadataCache getMaxComputeMetadataCache(long catalogId) { + MaxComputeMetadataCache cache = maxComputeMetadataCaches.get(catalogId); + if (cache == null) { + cache = new MaxComputeMetadataCache(); + maxComputeMetadataCaches.put(catalogId, cache); + } + return cache; + } + + public void removeCache(long catalogId) { + MaxComputeMetadataCache cache = maxComputeMetadataCaches.remove(catalogId); + if (cache != null) { + cache.cleanUp(); + } + } + + public void invalidateCatalogCache(long catalogId) { + MaxComputeMetadataCache cache = maxComputeMetadataCaches.get(catalogId); + if (cache != null) { + cache.cleanUp(); + } + } + + public void invalidateDbCache(long catalogId, String dbName) { + MaxComputeMetadataCache cache = maxComputeMetadataCaches.get(catalogId); + if (cache != null) { + cache.cleanDatabaseCache(dbName); + } + } + + public void invalidateTableCache(long catalogId, String dbName, String tblName) { + MaxComputeMetadataCache cache = maxComputeMetadataCaches.get(catalogId); + if (cache != null) { + cache.cleanTableCache(dbName, tblName); + } + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/PooledHiveMetaStoreClient.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/PooledHiveMetaStoreClient.java index f3c2557a1debc1..c699be330a101a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/PooledHiveMetaStoreClient.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/PooledHiveMetaStoreClient.java @@ -27,7 +27,6 @@ import com.amazonaws.glue.catalog.metastore.AWSCatalogMetastoreClient; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; -import org.apache.hadoop.hive.common.ValidReadTxnList; import org.apache.hadoop.hive.common.ValidReaderWriteIdList; import org.apache.hadoop.hive.common.ValidTxnList; import org.apache.hadoop.hive.common.ValidTxnWriteIdList; @@ -126,9 +125,15 @@ public boolean tableExists(String dbName, String tblName) { } public List listPartitionNames(String dbName, String tblName) { + return listPartitionNames(dbName, tblName, MAX_LIST_PARTITION_NUM); + } + + public List listPartitionNames(String dbName, String tblName, long max) { + // list all parts when the limit is greater than the short maximum + short limited = max <= Short.MAX_VALUE ? (short) max : MAX_LIST_PARTITION_NUM; try (CachedClient client = getClient()) { try { - return client.client.listPartitionNames(dbName, tblName, MAX_LIST_PARTITION_NUM); + return client.client.listPartitionNames(dbName, tblName, limited); } catch (Exception e) { client.setThrowable(e); throw e; diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java index 9468cc42881724..85245de1020ddd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileQueryScanNode.java @@ -339,6 +339,8 @@ public void createScanRangeLocations() throws UserException { PaimonScanNode.setPaimonParams(rangeDesc, (PaimonSplit) fileSplit); } else if (fileSplit instanceof HudiSplit) { HudiScanNode.setHudiParams(rangeDesc, (HudiSplit) fileSplit); + } else if (fileSplit instanceof MaxComputeSplit) { + MaxComputeScanNode.setScanParams(rangeDesc, (MaxComputeSplit) fileSplit); } curLocations.getScanRange().getExtScanRange().getFileScanRange().addToRanges(rangeDesc); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/MaxComputeScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/MaxComputeScanNode.java index d7f8d599a61555..ae0b424ad815af 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/MaxComputeScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/MaxComputeScanNode.java @@ -18,26 +18,33 @@ package org.apache.doris.planner.external; import org.apache.doris.analysis.TupleDescriptor; +import org.apache.doris.catalog.PartitionItem; import org.apache.doris.catalog.TableIf; import org.apache.doris.catalog.external.MaxComputeExternalTable; +import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Pair; import org.apache.doris.common.UserException; import org.apache.doris.datasource.MaxComputeExternalCatalog; +import org.apache.doris.planner.ListPartitionPrunerV2; import org.apache.doris.planner.PlanNodeId; import org.apache.doris.spi.Split; import org.apache.doris.statistics.StatisticalType; import org.apache.doris.thrift.TFileFormatType; +import org.apache.doris.thrift.TFileRangeDesc; import org.apache.doris.thrift.TFileType; +import org.apache.doris.thrift.TMaxComputeFileDesc; +import org.apache.doris.thrift.TTableFormatFileDesc; +import com.aliyun.odps.Table; import com.aliyun.odps.tunnel.TunnelException; import org.apache.hadoop.fs.Path; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Optional; public class MaxComputeScanNode extends FileQueryScanNode { @@ -56,6 +63,17 @@ public MaxComputeScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeNa catalog = (MaxComputeExternalCatalog) table.getCatalog(); } + public static void setScanParams(TFileRangeDesc rangeDesc, MaxComputeSplit maxComputeSplit) { + TTableFormatFileDesc tableFormatFileDesc = new TTableFormatFileDesc(); + tableFormatFileDesc.setTableFormatType(TableFormatType.MAX_COMPUTE.value()); + TMaxComputeFileDesc fileDesc = new TMaxComputeFileDesc(); + if (maxComputeSplit.getPartitionSpec().isPresent()) { + fileDesc.setPartitionSpec(maxComputeSplit.getPartitionSpec().get()); + } + tableFormatFileDesc.setMaxComputeParams(fileDesc); + rangeDesc.setTableFormatParams(tableFormatFileDesc); + } + @Override protected TFileType getLocationType() throws UserException { return getLocationType(null); @@ -89,43 +107,92 @@ protected Map getLocationProperties() throws UserException { @Override protected List getSplits() throws UserException { List result = new ArrayList<>(); - // String splitPath = catalog.getTunnelUrl(); - // TODO: use single max compute scan node rather than file scan node com.aliyun.odps.Table odpsTable = table.getOdpsTable(); if (desc.getSlots().isEmpty() || odpsTable.getFileNum() <= 0) { return result; } try { - List> sliceRange = new ArrayList<>(); - Optional partitionSpec = table.getPartitionSpec(conjuncts); - long totalRows = catalog.getTotalRows(table.getDbName(), table.getName(), partitionSpec); - long fileNum = odpsTable.getFileNum(); - long start = 0; - long splitSize = (long) Math.ceil((double) totalRows / fileNum); - if (splitSize <= 0 || totalRows < MIN_SPLIT_SIZE) { - // use whole split - sliceRange.add(Pair.of(start, totalRows)); - } else { - for (int i = 0; i < fileNum; i++) { - if (start > totalRows) { - break; - } - sliceRange.add(Pair.of(start, splitSize)); - start += splitSize; + if (!table.getPartitionNames().isEmpty()) { + if (conjuncts.isEmpty()) { + throw new IllegalArgumentException("Max Compute partition table need partition predicate."); } - } - long modificationTime = odpsTable.getLastDataModifiedTime().getTime(); - if (!sliceRange.isEmpty()) { - for (int i = 0; i < sliceRange.size(); i++) { - Pair range = sliceRange.get(i); - result.add(new FileSplit(new Path("/virtual_slice_" + i), range.first, range.second, - totalRows, modificationTime, null, Collections.emptyList())); + List partitionSpecs = getPartitionSpecs(); + for (String partitionSpec : partitionSpecs) { + addPartitionSplits(result, odpsTable, partitionSpec); } + } else { + addBatchSplits(result, odpsTable, table.getTotalRows()); } } catch (TunnelException e) { - throw new UserException("Max Compute tunnel SDK exception.", e); + throw new UserException("Max Compute tunnel SDK exception: " + e.getMessage(), e); } return result; } + + private static void addPartitionSplits(List result, Table odpsTable, String partitionSpec) { + long modificationTime = odpsTable.getLastDataModifiedTime().getTime(); + // use '-1' to read whole partition, avoid expending too much time on calling table.getTotalRows() + Pair range = Pair.of(0L, -1L); + FileSplit rangeSplit = new FileSplit(new Path("/virtual_slice_part"), + range.first, range.second, -1, modificationTime, null, Collections.emptyList()); + result.add(new MaxComputeSplit(partitionSpec, rangeSplit)); + } + + private static void addBatchSplits(List result, Table odpsTable, long totalRows) { + List> sliceRange = new ArrayList<>(); + long fileNum = odpsTable.getFileNum(); + long start = 0; + long splitSize = (long) Math.ceil((double) totalRows / fileNum); + if (splitSize <= 0 || totalRows < MIN_SPLIT_SIZE) { + // use whole split + sliceRange.add(Pair.of(start, totalRows)); + } else { + for (int i = 0; i < fileNum; i++) { + if (start > totalRows) { + break; + } + sliceRange.add(Pair.of(start, splitSize)); + start += splitSize; + } + } + long modificationTime = odpsTable.getLastDataModifiedTime().getTime(); + if (!sliceRange.isEmpty()) { + for (int i = 0; i < sliceRange.size(); i++) { + Pair range = sliceRange.get(i); + FileSplit rangeSplit = new FileSplit(new Path("/virtual_slice_" + i), + range.first, range.second, totalRows, modificationTime, null, Collections.emptyList()); + result.add(new MaxComputeSplit(rangeSplit)); + } + } + } + + private List getPartitionSpecs() throws AnalysisException { + return getPrunedPartitionSpecs(); + } + + private List getPrunedPartitionSpecs() throws AnalysisException { + List result = new ArrayList<>(); + TablePartitionValues partitionValues = table.getPartitionValues(); + // prune partitions by expr + partitionValues.readLock().lock(); + try { + Map idToPartitionItem = partitionValues.getIdToPartitionItem(); + this.totalPartitionNum = idToPartitionItem.size(); + ListPartitionPrunerV2 pruner = new ListPartitionPrunerV2(idToPartitionItem, + table.getPartitionColumns(), columnNameToRange, + partitionValues.getUidToPartitionRange(), + partitionValues.getRangeToId(), + partitionValues.getSingleColumnRangeMap(), + false); + Collection filteredPartitionIds = pruner.prune(); + this.readPartitionNum = filteredPartitionIds.size(); + // get partitions from cache + Map partitionIdToNameMap = partitionValues.getPartitionIdToNameMap(); + filteredPartitionIds.forEach(id -> result.add(partitionIdToNameMap.get(id))); + return result; + } finally { + partitionValues.readLock().unlock(); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/MaxComputeSplit.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/MaxComputeSplit.java new file mode 100644 index 00000000000000..a14e5fe22a6375 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/MaxComputeSplit.java @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.planner.external; + +import java.util.Optional; + +public class MaxComputeSplit extends FileSplit { + private final Optional partitionSpec; + + public MaxComputeSplit(FileSplit rangeSplit) { + super(rangeSplit.path, rangeSplit.start, rangeSplit.length, rangeSplit.fileLength, + rangeSplit.hosts, rangeSplit.partitionValues); + this.partitionSpec = Optional.empty(); + } + + public MaxComputeSplit(String partitionSpec, FileSplit rangeSplit) { + super(rangeSplit.path, rangeSplit.start, rangeSplit.length, rangeSplit.fileLength, + rangeSplit.hosts, rangeSplit.partitionValues); + this.partitionSpec = Optional.of(partitionSpec); + } + + public Optional getPartitionSpec() { + return partitionSpec; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/TableFormatType.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/TableFormatType.java index 891e138db6b17d..b5f41f97ba4d43 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/TableFormatType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/TableFormatType.java @@ -22,6 +22,7 @@ public enum TableFormatType { ICEBERG("iceberg"), HUDI("hudi"), PAIMON("paimon"), + MAX_COMPUTE("max_compute"), TRANSACTIONAL_HIVE("transactional_hive"); private final String tableFormatType; diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/TablePartitionValues.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/TablePartitionValues.java index a207f5f082a180..acd44a50900a78 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/TablePartitionValues.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/TablePartitionValues.java @@ -218,11 +218,16 @@ private List getHivePartitionValues(String partitionName) { @Data public static class TablePartitionKey { - private String dbName; - private String tblName; + private final String dbName; + private final String tblName; // not in key private List types; + public TablePartitionKey(String dbName, String tblName) { + this.dbName = dbName; + this.tblName = tblName; + } + public TablePartitionKey(String dbName, String tblName, List types) { this.dbName = dbName; this.tblName = tblName; diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/hudi/HudiCachedPartitionProcessor.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/hudi/HudiCachedPartitionProcessor.java index 37225c2339cd71..ba793ecf407772 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/hudi/HudiCachedPartitionProcessor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/hudi/HudiCachedPartitionProcessor.java @@ -96,11 +96,11 @@ public TablePartitionValues getSnapshotPartitionValues(HMSExternalTable table, if (Long.parseLong(timestamp) == lastTimestamp) { return getPartitionValues(table, tableMetaClient); } - List partitionNames = getPartitionNamesBeforeOrEquals(timeline, timestamp); - List partitionColumnsList = Arrays.asList(partitionColumns.get()); + List partitionNameAndValues = getPartitionNamesBeforeOrEquals(timeline, timestamp); + List partitionNames = Arrays.asList(partitionColumns.get()); TablePartitionValues partitionValues = new TablePartitionValues(); - partitionValues.addPartitions(partitionNames, - partitionNames.stream().map(p -> parsePartitionValues(partitionColumnsList, p)) + partitionValues.addPartitions(partitionNameAndValues, + partitionNameAndValues.stream().map(p -> parsePartitionValues(partitionNames, p)) .collect(Collectors.toList()), table.getPartitionColumnTypes()); return partitionValues; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index 45f3fc8030dcee..adb344cd897c46 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -25,6 +25,7 @@ import org.apache.doris.analysis.AdminShowTabletStorageFormatStmt; import org.apache.doris.analysis.DescribeStmt; import org.apache.doris.analysis.HelpStmt; +import org.apache.doris.analysis.LimitElement; import org.apache.doris.analysis.PartitionNames; import org.apache.doris.analysis.ShowAlterStmt; import org.apache.doris.analysis.ShowAnalyzeStmt; @@ -179,6 +180,7 @@ import org.apache.doris.common.util.Util; import org.apache.doris.datasource.CatalogIf; import org.apache.doris.datasource.HMSExternalCatalog; +import org.apache.doris.datasource.MaxComputeExternalCatalog; import org.apache.doris.external.iceberg.IcebergTableCreationRecord; import org.apache.doris.load.DeleteHandler; import org.apache.doris.load.ExportJob; @@ -1616,18 +1618,49 @@ private void handleShowPartitions() throws AnalysisException { List> rows = ((PartitionsProcDir) procNodeI).fetchResultByFilter(showStmt.getFilterMap(), showStmt.getOrderByPairs(), showStmt.getLimitElement()).getRows(); resultSet = new ShowResultSet(showStmt.getMetaData(), rows); + } else if (showStmt.getCatalog() instanceof MaxComputeExternalCatalog) { + handleShowMaxComputeTablePartitions(showStmt); } else { handleShowHMSTablePartitions(showStmt); } } + private void handleShowMaxComputeTablePartitions(ShowPartitionsStmt showStmt) { + MaxComputeExternalCatalog catalog = (MaxComputeExternalCatalog) (showStmt.getCatalog()); + List> rows = new ArrayList<>(); + String dbName = ClusterNamespace.getNameFromFullName(showStmt.getTableName().getDb()); + List partitionNames; + LimitElement limit = showStmt.getLimitElement(); + if (limit != null && limit.hasLimit()) { + partitionNames = catalog.listPartitionNames(dbName, + showStmt.getTableName().getTbl(), limit.getOffset(), limit.getLimit()); + } else { + partitionNames = catalog.listPartitionNames(dbName, showStmt.getTableName().getTbl()); + } + for (String partition : partitionNames) { + List list = new ArrayList<>(); + list.add(partition); + rows.add(list); + } + // sort by partition name + rows.sort(Comparator.comparing(x -> x.get(0))); + resultSet = new ShowResultSet(showStmt.getMetaData(), rows); + } + private void handleShowHMSTablePartitions(ShowPartitionsStmt showStmt) { HMSExternalCatalog catalog = (HMSExternalCatalog) (showStmt.getCatalog()); List> rows = new ArrayList<>(); String dbName = ClusterNamespace.getNameFromFullName(showStmt.getTableName().getDb()); - List partitionNames = catalog.getClient().listPartitionNames(dbName, - showStmt.getTableName().getTbl()); + List partitionNames; + LimitElement limit = showStmt.getLimitElement(); + if (limit != null && limit.hasLimit()) { + // only limit is valid on Hive + partitionNames = catalog.getClient() + .listPartitionNames(dbName, showStmt.getTableName().getTbl(), limit.getLimit()); + } else { + partitionNames = catalog.getClient().listPartitionNames(dbName, showStmt.getTableName().getTbl()); + } for (String partition : partitionNames) { List list = new ArrayList<>(); list.add(partition); diff --git a/gensrc/thrift/Descriptors.thrift b/gensrc/thrift/Descriptors.thrift index dc6398089535cc..cd0310c8658ff3 100644 --- a/gensrc/thrift/Descriptors.thrift +++ b/gensrc/thrift/Descriptors.thrift @@ -328,7 +328,6 @@ struct TMCTable { 4: optional string access_key 5: optional string secret_key 6: optional string public_access - 7: optional string partition_spec } // "Union" of all table types. diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift index e32092994ef0cd..a57745e78d3488 100644 --- a/gensrc/thrift/PlanNodes.thrift +++ b/gensrc/thrift/PlanNodes.thrift @@ -310,6 +310,9 @@ struct TPaimonFileDesc { 10: optional i64 last_update_time } +struct TMaxComputeFileDesc { + 1: optional string partition_spec +} struct THudiFileDesc { 1: optional string instant_time; @@ -340,6 +343,7 @@ struct TTableFormatFileDesc { 3: optional THudiFileDesc hudi_params 4: optional TPaimonFileDesc paimon_params 5: optional TTransactionalHiveDesc transactional_hive_params + 6: optional TMaxComputeFileDesc max_compute_params } enum TTextSerdeType { diff --git a/regression-test/data/external_table_p2/maxcompute/test_external_catalog_maxcompute.out b/regression-test/data/external_table_p2/maxcompute/test_external_catalog_maxcompute.out index 6cd91cf2ee30e1..e75e12c137b7a2 100644 --- a/regression-test/data/external_table_p2/maxcompute/test_external_catalog_maxcompute.out +++ b/regression-test/data/external_table_p2/maxcompute/test_external_catalog_maxcompute.out @@ -20,8 +20,50 @@ true 77 8920 182239402452 9601 qewtoll 2020-09-21 -- !q7 -- -6223 maxam 2020-09-21 -9601 qewtoll 2020-09-21 +1633 siwtow 2021-08-21 +1633 siwtow 20210821 +1633 siwtow 20210921 -- !replay_q6 -- 9601 qewtoll 2020-09-21 + +-- !multi_partition_q1 -- +pt=13/yy=2021/mm=12/dd=22 +pt=14/yy=2021/mm=12/dd=21 +pt=14/yy=2021/mm=12/dd=22 + +-- !multi_partition_q2 -- +17 2022-04-23T11:12:30 2021 12 22 +17 2022-04-23T11:12:30 2021 12 21 +16 2022-04-23T11:12:30 2021 12 22 + +-- !multi_partition_q3 -- +14 2022-04-23T11:12:30 2022 01 01 +14 2022-04-23T11:12:30 2022 01 02 +98 2022-04-23T11:12:30 2021 12 21 + +-- !multi_partition_q4 -- +22 + +-- !multi_partition_q5 -- +2022-04-23T11:12:30 2021 12 21 +2022-04-23T11:12:30 2021 12 21 +2022-04-23T11:12:30 2021 12 21 + +-- !multi_partition_q6 -- +17 2021 12 + +-- !multi_partition_q7 -- +20 + +-- !multi_partition_q8 -- +11 + +-- !multi_partition_q9 -- +lweu 8920 true 2023-11-23T12:03:54.952 0.123 2022-04-23 2022-04-23T11:12:30 12 2021 12 22 +wert 8920 true 2023-11-23T12:05:01.693 0.123 2022-04-23 2022-04-23T11:12:30 12 2021 12 22 + +-- !multi_partition_q10 -- +12 2021 12 21 +12 2021 12 22 +12 2021 12 22 diff --git a/regression-test/suites/external_table_p2/maxcompute/test_external_catalog_maxcompute.groovy b/regression-test/suites/external_table_p2/maxcompute/test_external_catalog_maxcompute.groovy index 23d0c0b252de32..c016f8b91f2e97 100644 --- a/regression-test/suites/external_table_p2/maxcompute/test_external_catalog_maxcompute.groovy +++ b/regression-test/suites/external_table_p2/maxcompute/test_external_catalog_maxcompute.groovy @@ -49,7 +49,7 @@ suite("test_external_catalog_maxcompute", "p2,external,maxcompute,external_remot qt_q4 """ select * from mc_parts where dt = '2020-09-21' """ qt_q5 """ select * from mc_parts where dt = '2021-08-21' """ qt_q6 """ select * from mc_parts where dt = '2020-09-21' and mc_bigint > 6223 """ - qt_q7 """ select * from mc_parts where dt = '2020-09-21' or mc_bigint > 0 """ + qt_q7 """ select * from mc_parts where dt = '2020-09-21' or (mc_bigint > 0 and dt > '2020-09-20') order by mc_bigint, dt limit 3; """ } sql """ switch `${mc_catalog_name}`; """ @@ -73,5 +73,20 @@ suite("test_external_catalog_maxcompute", "p2,external,maxcompute,external_remot sql """ switch `${mc_catalog_name}`; """ sql """ use `${mc_db}`; """ qt_replay_q6 """ select * from mc_parts where dt = '2020-09-21' and mc_bigint > 6223 """ + + // test multi partitions prune + sql """ refresh catalog ${mc_catalog_name} """ + sql """ switch `${mc_catalog_name}`; """ + sql """ use `${mc_db}`; """ + qt_multi_partition_q1 """ show partitions from multi_partitions limit 5,3; """ + qt_multi_partition_q2 """ select pt, create_time, yy, mm, dd from multi_partitions where pt>-1 and yy > '' and mm > '' and dd >'' order by pt desc, dd desc limit 3; """ + qt_multi_partition_q3 """ select sum(pt), create_time, yy, mm, dd from multi_partitions where yy > '' and mm > '' and dd >'' group by create_time, yy, mm, dd order by dd limit 3; """ + qt_multi_partition_q4 """ select count(*) from multi_partitions where pt>-1 and yy > '' and mm > '' and dd <= '30'; """ + qt_multi_partition_q5 """ select create_time, yy, mm, dd from multi_partitions where yy = '2021' and mm='12' and dd='21' order by pt limit 3; """ + qt_multi_partition_q6 """ select max(pt), yy, mm from multi_partitions where yy = '2021' and mm='12' group by yy, mm order by yy, mm; """ + qt_multi_partition_q7 """ select count(*) from multi_partitions where yy < '2022'; """ + qt_multi_partition_q8 """ select count(*) from multi_partitions where pt>=14; """ + qt_multi_partition_q9 """ select city,mnt,gender,finished_time,order_rate,cut_date,create_time,pt, yy, mm, dd from multi_partitions where pt >= 12 and pt < 14 and finished_time is not null; """ + qt_multi_partition_q10 """ select pt, yy, mm, dd from multi_partitions where pt >= 12 and create_time > '2022-04-23 11:11:00' order by pt, yy, mm, dd limit 3; """ } } From 264735b87576835b3f6715ebb63bc63f923769bc Mon Sep 17 00:00:00 2001 From: starocean999 <40539150+starocean999@users.noreply.github.com> Date: Sun, 3 Dec 2023 14:45:10 +0800 Subject: [PATCH 49/50] [fix](Nereids) should not push down project to the nullable side of outer join #27912 (#27913) --- .../PushdownProjectThroughInnerOuterJoin.java | 4 +- ...hdownProjectThroughInnerOuterJoinTest.java | 53 +++++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/PushdownProjectThroughInnerOuterJoin.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/PushdownProjectThroughInnerOuterJoin.java index 03fd3e8a8d8667..cf907e9578689a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/PushdownProjectThroughInnerOuterJoin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/PushdownProjectThroughInnerOuterJoin.java @@ -136,8 +136,8 @@ private Plan pushdownProject(LogicalProject> p return null; } // we could not push nullable side project - if ((join.getJoinType().isLeftOuterJoin() && rightContains) - || (join.getJoinType().isRightOuterJoin() && leftContains)) { + if (((join.getJoinType().isLeftOuterJoin() || join.getJoinType().isFullOuterJoin()) && rightContains) + || ((join.getJoinType().isRightOuterJoin() || join.getJoinType().isFullOuterJoin()) && leftContains)) { return null; } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/join/PushdownProjectThroughInnerOuterJoinTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/join/PushdownProjectThroughInnerOuterJoinTest.java index 19be848332d4fa..5e539202d7a70e 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/join/PushdownProjectThroughInnerOuterJoinTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/join/PushdownProjectThroughInnerOuterJoinTest.java @@ -73,6 +73,59 @@ public void pushBothSide() { ); } + @Test + public void pushRightSide() { + // project (t1.id + 1) as alias, t1.name, (t2.id + 1) as alias, t2.name + List projectExprs = ImmutableList.of( + new Alias(new Add(scan1.getOutput().get(0), Literal.of(1)), "alias"), + scan1.getOutput().get(1), + scan2.getOutput().get(1) + ); + // complex projection contain ti.id, which isn't in Join Condition + LogicalPlan plan = new LogicalPlanBuilder(scan1) + .join(scan2, JoinType.LEFT_OUTER_JOIN, Pair.of(1, 1)) + .projectExprs(projectExprs) + .join(scan3, JoinType.INNER_JOIN, Pair.of(1, 1)) + .build(); + + PlanChecker.from(MemoTestUtils.createConnectContext(), plan) + .applyExploration(PushdownProjectThroughInnerOuterJoin.INSTANCE.buildRules()) + .printlnOrigin() + .printlnExploration() + .matchesExploration( + logicalJoin( + logicalProject( + logicalJoin( + logicalProject().when(project -> project.getProjects().size() == 2), + logicalOlapScan() + ) + ), + logicalOlapScan() + ) + ); + } + + @Test + public void pushNoSide() { + // project (t1.id + 1) as alias, t1.name, (t2.id + 1) as alias, t2.name + List projectExprs = ImmutableList.of( + new Alias(new Add(scan1.getOutput().get(0), Literal.of(1)), "alias"), + scan1.getOutput().get(1), + scan2.getOutput().get(1) + ); + // complex projection contain ti.id, which isn't in Join Condition + LogicalPlan plan = new LogicalPlanBuilder(scan1) + .join(scan2, JoinType.FULL_OUTER_JOIN, Pair.of(1, 1)) + .projectExprs(projectExprs) + .join(scan3, JoinType.INNER_JOIN, Pair.of(1, 1)) + .build(); + + int plansNumber = PlanChecker.from(MemoTestUtils.createConnectContext(), plan) + .applyExploration(PushdownProjectThroughInnerOuterJoin.INSTANCE.buildRules()) + .plansNumber(); + Assertions.assertEquals(1, plansNumber); + } + @Test public void pushdownProjectInCondition() { // project (t1.id + 1) as alias, t1.name, (t2.id + 1) as alias, t2.name From 8b66024bdce5d40e01a239d1c3c600207fe06a4c Mon Sep 17 00:00:00 2001 From: eldenmoon <15605149486@163.com> Date: Sun, 3 Dec 2023 23:08:49 +0800 Subject: [PATCH 50/50] fix compile --- .../src/main/java/org/apache/doris/qe/ShowExecutor.java | 1 - .../java/org/apache/doris/statistics/AnalysisInfo.java | 7 +++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index adb344cd897c46..a2562287d29a92 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -2912,4 +2912,3 @@ private void handleShowConvertLSC() { } } - diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java index 4a10e921efbcf1..8b0ccfbdb80069 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisInfo.java @@ -188,6 +188,9 @@ public enum ScheduleType { @SerializedName("createTime") public final long createTime = System.currentTimeMillis(); + @SerializedName("startTime") + public long startTime; + @SerializedName("endTime") public long endTime; @@ -327,6 +330,10 @@ public static AnalysisInfo read(DataInput dataInput) throws IOException { return analysisInfo; } + public void markStartTime(long startTime) { + this.startTime = startTime; + } + public void markFinished() { state = AnalysisState.FINISHED; endTime = System.currentTimeMillis();