From b1a26d02d474059769892c87c3a2e6e2588c4999 Mon Sep 17 00:00:00 2001 From: airborne12 Date: Tue, 18 Jun 2024 16:12:16 +0800 Subject: [PATCH] [Fix](segment iterator) fix shrink non-char column coredump (#36275) If we execute a delete predicate in an inverted index table and then query something from it, it will cause a core dump in shrink_char_type_column_suffix_zero. This occurs because the delete predicate column ID in _char_type_idx is incorrectly shifted to the result column inserted by the inverted index result. coredump stack like: ``` COULD NOT CREATE A LOGGINGFILE 20240604-145331!F20240604 14:53:31.991016 24178 column.h:134] Cannot get_shrinked_column() column Const(UInt8) *** Check failure stack trace: *** F20240604 14:53:31.991436 24158 column.h:134] Cannot get_shrinked_column() column Const(UInt8) *** Check failure stack trace: *** F20240604 14:53:31.991436 24158 column.h:134] Cannot get_shrinked_column() column Const(UInt8)F20240604 14:53:31.991645 24160 column.h:134] Cannot get_shrinked_column() column Const(UInt8) #0 0x00007fa0bf784387 in ?? () from /mnt/disk2/tengjianping/local/ldb_toolchain/lib/libc.so.6 #1 0x0000557dc8e617a0 in google::LogMessage::Flush() () #2 0x0000557dc8e65599 in google::LogMessageFatal::~LogMessageFatal() () #3 0x0000557dbe4ec939 in doris::vectorized::IColumn::get_shrinked_column (this=0x7fa094ac16a0) at /home/zcp/repo_center/doris_release/doris/be/src/vec/columns/column.h:134 #4 0x0000557dc236d8a2 in doris::vectorized::Block::shrink_char_type_column_suffix_zero (this=0x7f9a372a4b70, char_type_idx=...) at /home/zcp/repo_center/doris_release/doris/be/src/vec/core/block.cpp:1126 #5 0x0000557dbee9ea95 in doris::segment_v2::SegmentIterator::_next_batch_internal (this=, block=0x7f9a372a4b70) at /home/zcp/repo_center/doris_release/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2408 #6 0x0000557dbee9cc5a in doris::segment_v2::SegmentIterator::next_batch(doris::vectorized::Block*)::$_0::operator()() const (this=) at /home/zcp/repo_center/doris_release/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2090 #7 doris::segment_v2::SegmentIterator::next_batch (this=0x1c5d, block=0x6) at /home/zcp/repo_center/doris_release/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2089 #8 0x0000557dbeebbb44 in doris::BetaRowsetReader::next_block (this=0x7f9a3a041600, block=0x7f9a372a4b70) at /home/zcp/repo_center/doris_release/doris/be/src/olap/rowset/beta_rowset_reader.cpp:342 #9 0x0000557dc8590ddd in doris::vectorized::VCollectIterator::Level0Iterator::_refresh (this=0x7f9a372a1be0) at /home/zcp/repo_center/doris_release/doris/be/src/vec/olap/vcollect_iterator.h:256 #10 doris::vectorized::VCollectIterator::Level0Iterator::refresh_current_row (this=0x7f9a372a1be0) at /home/zcp/repo_center/doris_release/doris/be/src/vec/olap/vcollect_iterator.cpp:514 #11 0x0000557dc8591055 in doris::vectorized::VCollectIterator::Level0Iterator::ensure_first_row_ref (this=0x1c5d) at /home/zcp/repo_center/doris_release/doris/be/src/vec/olap/vcollect_iterator.cpp:493 #12 0x0000557dc85934f2 in doris::vectorized::VCollectIterator::Level1Iterator::ensure_first_row_ref (this=0x7fa090e36c00) at /home/zcp/repo_center/doris_release/doris/be/src/vec/olap/vcollect_iterator.cpp:692 #13 0x0000557dc858ddc9 in doris::vectorized::VCollectIterator::build_heap (this=0x7f9d80976dc0, rs_readers=...) at /home/zcp/repo_center/doris_release/doris/be/src/vec/olap/vcollect_iterator.cpp:186 #14 0x0000557dc857e808 in doris::vectorized::BlockReader::_init_collect_iter (this=this@entry=0x7f9d80976800, read_params=...) at /home/zcp/repo_center/doris_release/doris/be/src/vec/olap/block_reader.cpp:156 #15 0x0000557dc857f64d in doris::vectorized::BlockReader::init (this=, read_params=...) at /home/zcp/repo_center/doris_release/doris/be/src/vec/olap/block_reader.cpp:228 #16 0x0000557dc370dc2c in doris::vectorized::NewOlapScanner::open (this=0x7f9a2aa68210, state=) --Type for more, q to quit, c to continue without paging--c at /home/zcp/repo_center/doris_release/doris/be/src/vec/exec/scan/new_olap_scanner.cpp:227 #17 0x0000557dc3718d89 in doris::vectorized::ScannerScheduler::_scanner_scan (ctx=std::shared_ptr (use count 4, weak count 1) = {...}, scan_task=std::shared_ptr (use count 2, weak count 0) = {...}) at /home/zcp/repo_center/doris_release/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:242 #18 0x0000557dc3719b77 in doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1::operator()() const (this=0x0) at /home/zcp/repo_center/doris_release/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:158 #19 std::__invoke_impl, std::shared_ptr)::$_1&>(std::__invoke_other, doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1&) (__f=...) at /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:61 #20 std::__invoke_r, std::shared_ptr)::$_1&>(doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1&) (__fn=...) at /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:111 #21 std::_Function_handler, std::shared_ptr)::$_1>::_M_invoke(std::_Any_data const&) (__functor=...) at /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:291 #22 0x0000557dbf3c7788 in doris::ThreadPool::dispatch_thread (this=0x7f9d0bbdc400) at /home/zcp/repo_center/doris_release/doris/be/src/util/threadpool.cpp:543 #23 0x0000557dbf3bcb41 in std::function::operator()() const (this=0x1471) at /var/local/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:560 #24 doris::Thread::supervise_thread (arg=0x7f9d0bbdf4c0) at /home/zcp/repo_center/doris_release/doris/be/src/util/thread.cpp:498 #25 0x00007fa0bee1dea5 in sem_open () from /mnt/disk2/tengjianping/local/ldb_toolchain/lib/libpthread.so.0 ``` --- .../rowset/segment_v2/segment_iterator.cpp | 28 +++++---- .../olap/rowset/segment_v2/segment_iterator.h | 2 +- .../data/inverted_index_p0/test_delete.out | 4 ++ .../inverted_index_p0/test_delete.groovy | 60 +++++++++++++++++++ 4 files changed, 81 insertions(+), 13 deletions(-) create mode 100644 regression-test/data/inverted_index_p0/test_delete.out create mode 100644 regression-test/suites/inverted_index_p0/test_delete.groovy diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 9ac090532f51b5..eb8bb12294c404 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -321,10 +321,6 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) { } RETURN_IF_ERROR(init_iterators()); - if (_char_type_idx.empty() && _char_type_idx_no_0.empty()) { - _is_char_type.resize(_schema->columns().size(), false); - _vec_init_char_column_id(); - } if (opts.output_columns != nullptr) { _output_columns = *(opts.output_columns); @@ -1843,19 +1839,23 @@ bool SegmentIterator::_has_char_type(const Field& column_desc) { } }; -void SegmentIterator::_vec_init_char_column_id() { +void SegmentIterator::_vec_init_char_column_id(vectorized::Block* block) { for (size_t i = 0; i < _schema->num_column_ids(); i++) { auto cid = _schema->column_id(i); const Field* column_desc = _schema->column(cid); - if (_has_char_type(*column_desc)) { - _char_type_idx.emplace_back(i); - if (i != 0) { - _char_type_idx_no_0.emplace_back(i); + // The additional deleted filter condition will be in the materialized column at the end of the block. + // After _output_column_by_sel_idx, it will be erased, so we do not need to shrink it. + if (i < block->columns()) { + if (_has_char_type(*column_desc)) { + _char_type_idx.emplace_back(i); + if (i != 0) { + _char_type_idx_no_0.emplace_back(i); + } + } + if (column_desc->type() == FieldType::OLAP_FIELD_TYPE_CHAR) { + _is_char_type[cid] = true; } - } - if (column_desc->type() == FieldType::OLAP_FIELD_TYPE_CHAR) { - _is_char_type[cid] = true; } } } @@ -2292,6 +2292,10 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) { } _current_return_columns.resize(_schema->columns().size()); _converted_column_ids.resize(_schema->columns().size(), 0); + if (_char_type_idx.empty() && _char_type_idx_no_0.empty()) { + _is_char_type.resize(_schema->columns().size(), false); + _vec_init_char_column_id(block); + } for (size_t i = 0; i < _schema->num_column_ids(); i++) { auto cid = _schema->column_id(i); auto column_desc = _schema->column(cid); diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index 7e08bfefa6c28b..0a7b533130148d 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -207,7 +207,7 @@ class SegmentIterator : public RowwiseIterator { // TODO: Fix Me // CHAR type in storage layer padding the 0 in length. But query engine need ignore the padding 0. // so segment iterator need to shrink char column before output it. only use in vec query engine. - void _vec_init_char_column_id(); + void _vec_init_char_column_id(vectorized::Block* block); bool _has_char_type(const Field& column_desc); uint32_t segment_id() const { return _segment->id(); } diff --git a/regression-test/data/inverted_index_p0/test_delete.out b/regression-test/data/inverted_index_p0/test_delete.out new file mode 100644 index 00000000000000..4ee136aef2b9c5 --- /dev/null +++ b/regression-test/data/inverted_index_p0/test_delete.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +2 + diff --git a/regression-test/suites/inverted_index_p0/test_delete.groovy b/regression-test/suites/inverted_index_p0/test_delete.groovy new file mode 100644 index 00000000000000..3d6100ca4fe9ec --- /dev/null +++ b/regression-test/suites/inverted_index_p0/test_delete.groovy @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_delete"){ + // prepare test table + + def timeout = 60000 + def delta_time = 1000 + def alter_res = "null" + def useTime = 0 + + def indexTblName = "test_delete" + + sql "DROP TABLE IF EXISTS ${indexTblName}" + // create 1 replica table + sql """ + CREATE TABLE `${indexTblName}` ( + `a` int NULL COMMENT '', + `b` varchar(60) NOT NULL COMMENT '', + `c` char(10) NULL COMMENT '', + INDEX index_b(b) USING INVERTED COMMENT '', + INDEX index_c(c) USING INVERTED COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`a`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`a`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "is_being_synced" = "false", + "storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false" + ); + """ + + sql """ INSERT INTO `${indexTblName}`(`a`, `b`, `c`) VALUES ('1', '6afef581285b6608bf80d5a4e46cf839', 'aaa'), ('2', '48a33ec3453a28bce84b8f96fe161956', 'bbb'), + ('3', '021603e7dcfe65d44af0efd0e5aee154', 'ccc'), ('4', 'ee27ee1da291e46403c408e220bed6e1', 'ddd'), + ('5', 'a648a447b8f71522f11632eba4b4adde', 'eee'), ('6', 'a9fb5c985c90bf05f3bee5ca3ae95260', 'fff'), + ('7', '0974e7a82e30d1af83205e474fadd0a2', 'ggg'); """ + + + sql """ DELETE FROM ${indexTblName} WHERE c IN ('aaa','ccc'); """ + + qt_sql """ SELECT count(1) as cnt FROM ${indexTblName} WHERE a BETWEEN 1 AND 6 AND b IN ('48a33ec3453a28bce84b8f96fe161956', 'a9fb5c985c90bf05f3bee5ca3ae95260'); """ +} \ No newline at end of file