diff --git a/be/src/exprs/bloom_filter_func.h b/be/src/exprs/bloom_filter_func.h index 3c60ccc89c7731..71dc3f6e663ab1 100644 --- a/be/src/exprs/bloom_filter_func.h +++ b/be/src/exprs/bloom_filter_func.h @@ -312,13 +312,22 @@ struct CommonFindOp { void find_batch(const BloomFilterAdaptor& bloom_filter, const vectorized::ColumnPtr& column, uint8_t* results) const { + const T* __restrict data = nullptr; + const uint8_t* __restrict nullmap = nullptr; if (column->is_nullable()) { const auto* nullable = assert_cast(column.get()); - const auto& nullmap = - assert_cast(nullable->get_null_map_column()) - .get_data(); + if (nullable->has_null()) { + nullmap = + assert_cast(nullable->get_null_map_column()) + .get_data() + .data(); + } + data = (T*)nullable->get_nested_column().get_raw_data().data; + } else { + data = (T*)column->get_raw_data().data; + } - const T* data = (T*)nullable->get_nested_column().get_raw_data().data; + if (nullmap) { for (size_t i = 0; i < column->size(); i++) { if (!nullmap[i]) { results[i] = bloom_filter.test_element(data[i]); @@ -327,7 +336,6 @@ struct CommonFindOp { } } } else { - const T* data = (T*)column->get_raw_data().data; for (size_t i = 0; i < column->size(); i++) { results[i] = bloom_filter.test_element(data[i]); } @@ -340,8 +348,8 @@ struct CommonFindOp { }; struct StringFindOp : CommonFindOp { - void insert_batch(BloomFilterAdaptor& bloom_filter, const vectorized::ColumnPtr& column, - size_t start) { + static void insert_batch(BloomFilterAdaptor& bloom_filter, const vectorized::ColumnPtr& column, + size_t start) { if (column->is_nullable()) { const auto* nullable = assert_cast(column.get()); const auto& col = @@ -363,8 +371,8 @@ struct StringFindOp : CommonFindOp { } } - void find_batch(const BloomFilterAdaptor& bloom_filter, const vectorized::ColumnPtr& column, - uint8_t* results) { + static void find_batch(const BloomFilterAdaptor& bloom_filter, + const vectorized::ColumnPtr& column, uint8_t* results) { if (column->is_nullable()) { const auto* nullable = assert_cast(column.get()); const auto& col = @@ -372,12 +380,17 @@ struct StringFindOp : CommonFindOp { const auto& nullmap = assert_cast(nullable->get_null_map_column()) .get_data(); - - for (size_t i = 0; i < column->size(); i++) { - if (!nullmap[i]) { + if (nullable->has_null()) { + for (size_t i = 0; i < column->size(); i++) { + if (!nullmap[i]) { + results[i] = bloom_filter.test_element(col.get_data_at(i)); + } else { + results[i] = false; + } + } + } else { + for (size_t i = 0; i < column->size(); i++) { results[i] = bloom_filter.test_element(col.get_data_at(i)); - } else { - results[i] = false; } } } else { @@ -392,9 +405,9 @@ struct StringFindOp : CommonFindOp { // We do not need to judge whether data is empty, because null will not appear // when filer used by the storage engine struct FixedStringFindOp : public StringFindOp { - uint16_t find_batch_olap_engine(const BloomFilterAdaptor& bloom_filter, const char* data, - const uint8* nullmap, uint16_t* offsets, int number, - const bool is_parse_column) { + static uint16_t find_batch_olap_engine(const BloomFilterAdaptor& bloom_filter, const char* data, + const uint8* nullmap, uint16_t* offsets, int number, + const bool is_parse_column) { return find_batch_olap(bloom_filter, data, nullmap, offsets, number, is_parse_column); } diff --git a/be/src/pipeline/exec/exchange_sink_buffer.cpp b/be/src/pipeline/exec/exchange_sink_buffer.cpp index be5d3e6448041f..4484a34375b539 100644 --- a/be/src/pipeline/exec/exchange_sink_buffer.cpp +++ b/be/src/pipeline/exec/exchange_sink_buffer.cpp @@ -106,7 +106,7 @@ void ExchangeSinkBuffer::close() { template bool ExchangeSinkBuffer::can_write() const { - size_t max_package_size = 64 * _instance_to_package_queue.size(); + size_t max_package_size = QUEUE_CAPACITY_FACTOR * _instance_to_package_queue.size(); size_t total_package_size = 0; for (auto& [_, q] : _instance_to_package_queue) { total_package_size += q.size(); diff --git a/be/src/vec/aggregate_functions/aggregate_function_null.h b/be/src/vec/aggregate_functions/aggregate_function_null.h index becb06f7cfca64..939396073825c4 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_null.h +++ b/be/src/vec/aggregate_functions/aggregate_function_null.h @@ -210,17 +210,29 @@ class AggregateFunctionNullUnaryInline final } } - void add_batch(size_t batch_size, AggregateDataPtr* places, size_t place_offset, + void add_batch(size_t batch_size, AggregateDataPtr* __restrict places, size_t place_offset, const IColumn** columns, Arena* arena, bool agg_many) const override { - const ColumnNullable* column = assert_cast(columns[0]); - // The overhead introduced is negligible here, just an extra memory read from NullMap - const auto* __restrict null_map_data = column->get_null_map_data().data(); + const auto* column = assert_cast(columns[0]); const IColumn* nested_column = &column->get_nested_column(); - for (int i = 0; i < batch_size; ++i) { - if (!null_map_data[i]) { - AggregateDataPtr __restrict place = places[i] + place_offset; - this->set_flag(place); - this->nested_function->add(this->nested_place(place), &nested_column, i, arena); + if (column->has_null()) { + const auto* __restrict null_map_data = column->get_null_map_data().data(); + for (int i = 0; i < batch_size; ++i) { + if (!null_map_data[i]) { + AggregateDataPtr __restrict place = places[i] + place_offset; + this->set_flag(place); + this->nested_function->add(this->nested_place(place), &nested_column, i, arena); + } + } + } else { + if constexpr (result_is_nullable) { + for (int i = 0; i < batch_size; ++i) { + AggregateDataPtr __restrict place = places[i] + place_offset; + place[0] |= 1; + this->nested_function->add(this->nested_place(place), &nested_column, i, arena); + } + } else { + this->nested_function->add_batch(batch_size, places, place_offset, &nested_column, + arena, agg_many); } } } diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index d67ad3e206b695..23f9073eff5e53 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -603,9 +603,6 @@ class IColumn : public COW { // true if column has null element [0,size) virtual bool has_null(size_t size) const { return false; } - /// It's a special kind of column, that contain single value, but is not a ColumnConst. - virtual bool is_dummy() const { return false; } - virtual bool is_exclusive() const { return use_count() == 1; } /// Clear data of column, just like vector clear @@ -733,7 +730,7 @@ using ColumnPtr = IColumn::Ptr; using MutableColumnPtr = IColumn::MutablePtr; using Columns = std::vector; using MutableColumns = std::vector; - +using ColumnPtrs = std::vector; using ColumnRawPtrs = std::vector; template diff --git a/be/src/vec/columns/column_array.cpp b/be/src/vec/columns/column_array.cpp index 866a55c447f12a..8d1cbdd69acc1d 100644 --- a/be/src/vec/columns/column_array.cpp +++ b/be/src/vec/columns/column_array.cpp @@ -395,10 +395,17 @@ void ColumnArray::update_crcs_with_value(uint32_t* __restrict hash, PrimitiveTyp } void ColumnArray::insert(const Field& x) { - const Array& array = doris::vectorized::get(x); - size_t size = array.size(); - for (size_t i = 0; i < size; ++i) get_data().insert(array[i]); - get_offsets().push_back(get_offsets().back() + size); + if (x.is_null()) { + get_data().insert(Null()); + get_offsets().push_back(get_offsets().back() + 1); + } else { + const auto& array = doris::vectorized::get(x); + size_t size = array.size(); + for (size_t i = 0; i < size; ++i) { + get_data().insert(array[i]); + } + get_offsets().push_back(get_offsets().back() + size); + } } void ColumnArray::insert_from(const IColumn& src_, size_t n) { diff --git a/be/src/vec/columns/column_const.h b/be/src/vec/columns/column_const.h index 8d03087cc3d989..f36f0e81879107 100644 --- a/be/src/vec/columns/column_const.h +++ b/be/src/vec/columns/column_const.h @@ -73,7 +73,9 @@ class ColumnConst final : public COWHelper { public: ColumnPtr convert_to_full_column() const; - ColumnPtr convert_to_full_column_if_const() const override { return convert_to_full_column(); } + ColumnPtr convert_to_full_column_if_const() const override { + return convert_to_full_column()->convert_to_full_column_if_const(); + } ColumnPtr remove_low_cardinality() const; diff --git a/be/src/vec/columns/column_decimal.cpp b/be/src/vec/columns/column_decimal.cpp index ed76ce6a113308..dd42b3563a827d 100644 --- a/be/src/vec/columns/column_decimal.cpp +++ b/be/src/vec/columns/column_decimal.cpp @@ -79,7 +79,7 @@ template void ColumnDecimal::serialize_vec(std::vector& keys, size_t num_rows, size_t max_row_byte_size) const { for (size_t i = 0; i < num_rows; ++i) { - memcpy(const_cast(keys[i].data + keys[i].size), &data[i], sizeof(T)); + memcpy_fixed(const_cast(keys[i].data + keys[i].size), (char*)&data[i]); keys[i].size += sizeof(T); } } @@ -89,7 +89,7 @@ void ColumnDecimal::serialize_vec_with_null_map(std::vector& keys, const uint8_t* null_map) const { for (size_t i = 0; i < num_rows; ++i) { if (null_map[i] == 0) { - memcpy(const_cast(keys[i].data + keys[i].size), &data[i], sizeof(T)); + memcpy_fixed(const_cast(keys[i].data + keys[i].size), (char*)&data[i]); keys[i].size += sizeof(T); } } diff --git a/be/src/vec/columns/column_dummy.h b/be/src/vec/columns/column_dummy.h index f1f7dac52d5889..b51cd8faa659eb 100644 --- a/be/src/vec/columns/column_dummy.h +++ b/be/src/vec/columns/column_dummy.h @@ -150,8 +150,6 @@ class IColumnDummy : public IColumn { void addSize(size_t delta) { s += delta; } - bool is_dummy() const override { return true; } - void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override { LOG(FATAL) << "should not call the method in column dummy"; } diff --git a/be/src/vec/columns/column_map.cpp b/be/src/vec/columns/column_map.cpp index e46ea7fe683128..53d23df8ae3d71 100644 --- a/be/src/vec/columns/column_map.cpp +++ b/be/src/vec/columns/column_map.cpp @@ -520,4 +520,10 @@ size_t ColumnMap::allocated_bytes() const { get_offsets().allocated_bytes(); } +ColumnPtr ColumnMap::convert_to_full_column_if_const() const { + return ColumnMap::create(keys_column->convert_to_full_column_if_const(), + values_column->convert_to_full_column_if_const(), + offsets_column->convert_to_full_column_if_const()); +} + } // namespace doris::vectorized diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h index 45cedeb0d94e69..fe1ccfb6f82e9e 100644 --- a/be/src/vec/columns/column_map.h +++ b/be/src/vec/columns/column_map.h @@ -91,6 +91,8 @@ class ColumnMap final : public COWHelper { offsets_column->clear(); } + ColumnPtr convert_to_full_column_if_const() const override; + MutableColumnPtr clone_resized(size_t size) const override; Field operator[](size_t n) const override; diff --git a/be/src/vec/columns/column_nullable.cpp b/be/src/vec/columns/column_nullable.cpp index ecf330bead3ca6..426de2d4f70eee 100644 --- a/be/src/vec/columns/column_nullable.cpp +++ b/be/src/vec/columns/column_nullable.cpp @@ -257,15 +257,22 @@ size_t ColumnNullable::get_max_row_byte_size() const { void ColumnNullable::serialize_vec(std::vector& keys, size_t num_rows, size_t max_row_byte_size) const { - const auto& arr = get_null_map_data(); - static constexpr auto s = sizeof(arr[0]); - for (size_t i = 0; i < num_rows; ++i) { - auto* val = const_cast(keys[i].data + keys[i].size); - *val = (arr[i] ? 1 : 0); - keys[i].size += s; + if (has_null()) { + const auto& arr = get_null_map_data(); + for (size_t i = 0; i < num_rows; ++i) { + auto* val = const_cast(keys[i].data + keys[i].size); + *val = (arr[i] ? 1 : 0); + keys[i].size++; + } + get_nested_column().serialize_vec_with_null_map(keys, num_rows, arr.data()); + } else { + for (size_t i = 0; i < num_rows; ++i) { + auto* val = const_cast(keys[i].data + keys[i].size); + *val = 0; + keys[i].size++; + } + get_nested_column().serialize_vec(keys, num_rows, max_row_byte_size); } - - get_nested_column().serialize_vec_with_null_map(keys, num_rows, arr.data()); } void ColumnNullable::deserialize_vec(std::vector& keys, const size_t num_rows) { @@ -282,7 +289,11 @@ void ColumnNullable::deserialize_vec(std::vector& keys, const size_t keys[i].data += sizeof(val); keys[i].size -= sizeof(val); } - get_nested_column().deserialize_vec_with_null_map(keys, num_rows, arr.data()); + if (_has_null) { + get_nested_column().deserialize_vec_with_null_map(keys, num_rows, arr.data()); + } else { + get_nested_column().deserialize_vec(keys, num_rows); + } } void ColumnNullable::insert_range_from(const IColumn& src, size_t start, size_t length) { diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h index 10b0951ab8b96a..83cbe82e328fd9 100644 --- a/be/src/vec/columns/column_nullable.h +++ b/be/src/vec/columns/column_nullable.h @@ -274,7 +274,8 @@ class ColumnNullable final : public COWHelper { size_t size_of_value_if_fixed() const override { return null_map->size_of_value_if_fixed() + nested_column->size_of_value_if_fixed(); } - bool only_null() const override { return nested_column->is_dummy(); } + + bool only_null() const override { return size() == 1 && is_null_at(0); } // used in schema change void change_nested_column(ColumnPtr& other) { ((ColumnPtr&)nested_column) = other; } diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index 77366d05cb58b2..aff38c56a80bb0 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -488,7 +488,7 @@ void ColumnObject::Subcolumn::finalize() { throw doris::Exception(ErrorCode::INVALID_ARGUMENT, st.to_string() + ", real_code:{}", st.code()); } - part = ptr; + part = ptr->convert_to_full_column_if_const(); } result_column->insert_range_from(*part, 0, part_size); } diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp index 424a8717e1498c..337f5e5663a242 100644 --- a/be/src/vec/columns/column_string.cpp +++ b/be/src/vec/columns/column_string.cpp @@ -313,7 +313,7 @@ void ColumnString::serialize_vec(std::vector& keys, size_t num_rows, uint32_t string_size(size_at(i)); auto* ptr = const_cast(keys[i].data + keys[i].size); - memcpy(ptr, &string_size, sizeof(string_size)); + memcpy_fixed(ptr, (char*)&string_size); memcpy(ptr + sizeof(string_size), &chars[offset], string_size); keys[i].size += sizeof(string_size) + string_size; } @@ -327,7 +327,7 @@ void ColumnString::serialize_vec_with_null_map(std::vector& keys, siz uint32_t string_size(size_at(i)); auto* ptr = const_cast(keys[i].data + keys[i].size); - memcpy(ptr, &string_size, sizeof(string_size)); + memcpy_fixed(ptr, (char*)&string_size); memcpy(ptr + sizeof(string_size), &chars[offset], string_size); keys[i].size += sizeof(string_size) + string_size; } diff --git a/be/src/vec/common/hash_table/hash_map_context.h b/be/src/vec/common/hash_table/hash_map_context.h index 0da222d37216dc..3784379902bfdf 100644 --- a/be/src/vec/common/hash_table/hash_map_context.h +++ b/be/src/vec/common/hash_table/hash_map_context.h @@ -377,12 +377,14 @@ struct MethodKeysFixed : public MethodBase { assert_cast(*nullmap_columns[j]).get_data().data(); for (size_t i = 0; i < row_numbers; ++i) { // make sure null cell is filled by 0x0 - memcpy_fixed((char*)(&result[i]) + offset, - nullmap[i] ? (char*)&zero : data + i * sizeof(Fixed)); + memcpy_fixed( + (char*)(&result[i]) + offset, + nullmap[i] ? (char*)&zero : data + i * sizeof(Fixed)); } } else { for (size_t i = 0; i < row_numbers; ++i) { - memcpy_fixed((char*)(&result[i]) + offset, data + i * sizeof(Fixed)); + memcpy_fixed((char*)(&result[i]) + offset, + data + i * sizeof(Fixed)); } } }; @@ -474,7 +476,8 @@ struct MethodKeysFixed : public MethodBase { auto foo = [&](Fixed zero) { CHECK_EQ(sizeof(Fixed), size); for (size_t j = 0; j < num_rows; j++) { - memcpy_fixed(data + j * sizeof(Fixed), (char*)(&input_keys[j]) + pos); + memcpy_fixed(data + j * sizeof(Fixed), + (char*)(&input_keys[j]) + pos); } }; diff --git a/be/src/vec/common/memcpy_small.h b/be/src/vec/common/memcpy_small.h index af5d0e6074d8bb..473900663186e6 100644 --- a/be/src/vec/common/memcpy_small.h +++ b/be/src/vec/common/memcpy_small.h @@ -82,7 +82,12 @@ inline void memcpy_small_allow_read_write_overflow15(void* __restrict dst, #endif -template +// assume input address not aligned by default +template void memcpy_fixed(char* lhs, const char* rhs) { - *(T*)lhs = *(T*)rhs; + if constexpr (aligned || sizeof(T) <= 8) { + *(T*)lhs = *(T*)rhs; + } else { + memcpy(lhs, rhs, sizeof(T)); + } } diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index 1290ddb237f09e..0ceddf25b3feee 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -150,9 +150,6 @@ Status cast_column(const ColumnWithTypeAndName& arg, const DataTypePtr& type, Co type->get_name()); } Block tmp_block {arguments}; - vectorized::ColumnNumbers argnum; - argnum.emplace_back(0); - argnum.emplace_back(1); size_t result_column = tmp_block.columns(); auto ctx = FunctionContext::create_context(nullptr, {}, {}); // We convert column string to jsonb type just add a string jsonb field to dst column instead of parse @@ -160,32 +157,8 @@ Status cast_column(const ColumnWithTypeAndName& arg, const DataTypePtr& type, Co ctx->set_string_as_jsonb_string(true); tmp_block.insert({nullptr, type, arg.name}); RETURN_IF_ERROR( - function->execute(ctx.get(), tmp_block, argnum, result_column, arg.column->size())); - *result = std::move(tmp_block.get_by_position(result_column).column); - // Variant column is a really special case, src type is nullable but dst variant type is none nullable, - // but we still need to wrap nullmap into variant root column to prevent from nullable info lost. - // TODO rethink and better handle this sepecial situation - if (arg.type->is_nullable() && WhichDataType(remove_nullable(type)).is_variant_type()) { - auto variant = ColumnObject::create(true); - auto& old_variant = - (*result)->is_nullable() - ? assert_cast( - assert_cast(**result).get_nested_column()) - : assert_cast(*(*result)->assume_mutable()); - DCHECK(!old_variant.get_root()->is_nullable()); - auto nullable = ColumnNullable::create( - old_variant.get_root(), - assert_cast(*arg.column).get_null_map_column_ptr()); - variant->create_root(make_nullable(arg.type), nullable->assume_mutable()); - if ((*result)->is_nullable()) { - *result = ColumnNullable::create(std::move(variant), - assert_cast(*arg.column) - .get_null_map_column_ptr() - ->clone_resized(nullable->size())); - } else { - *result = std::move(variant); - } - } + function->execute(ctx.get(), tmp_block, {0}, result_column, arg.column->size())); + *result = tmp_block.get_by_position(result_column).column->convert_to_full_column_if_const(); return Status::OK(); } @@ -422,6 +395,7 @@ Status parse_variant_columns(Block& block, const std::vector& variant_pos) bool is_nullable = column_ref->is_nullable(); const auto& column = remove_nullable(column_ref); const auto& var = assert_cast(*column.get()); + var.assume_mutable_ref().finalize(); if (!var.is_scalar_variant()) { // already parsed continue; @@ -431,12 +405,19 @@ Status parse_variant_columns(Block& block, const std::vector& variant_pos) // TODO more efficient way to parse jsonb type, currently we just convert jsonb to // json str and parse them into variant RETURN_IF_ERROR(cast_column({var.get_root(), var.get_root_type(), ""}, - std::make_shared(), &raw_json_column)); + var.get_root()->is_nullable() + ? make_nullable(std::make_shared()) + : std::make_shared(), + &raw_json_column)); + if (raw_json_column->is_nullable()) { + raw_json_column = assert_cast(raw_json_column.get()) + ->get_nested_column_ptr(); + } } else { const auto& root = *var.get_root(); raw_json_column = root.is_nullable() - ? static_cast(root).get_nested_column_ptr() + ? assert_cast(root).get_nested_column_ptr() : var.get_root(); } diff --git a/be/src/vec/common/sort/partition_sorter.cpp b/be/src/vec/common/sort/partition_sorter.cpp index 083c676ba8c1ff..a3954aa3f55c67 100644 --- a/be/src/vec/common/sort/partition_sorter.cpp +++ b/be/src/vec/common/sort/partition_sorter.cpp @@ -66,7 +66,7 @@ Status PartitionSorter::prepare_for_read() { auto& cursors = _state->get_cursors(); auto& blocks = _state->get_sorted_block(); auto& priority_queue = _state->get_priority_queue(); - for (const auto& block : blocks) { + for (auto& block : blocks) { cursors.emplace_back(block, _sort_description); } for (auto& cursor : cursors) { diff --git a/be/src/vec/common/sort/sorter.cpp b/be/src/vec/common/sort/sorter.cpp index bd095e1afc561c..644db862f063e6 100644 --- a/be/src/vec/common/sort/sorter.cpp +++ b/be/src/vec/common/sort/sorter.cpp @@ -99,12 +99,14 @@ Status MergeSorterState::add_sorted_block(Block& block) { } void MergeSorterState::_build_merge_tree_not_spilled(const SortDescription& sort_description) { - for (const auto& block : sorted_blocks_) { + for (auto& block : sorted_blocks_) { cursors_.emplace_back(block, sort_description); } if (sorted_blocks_.size() > 1) { - for (auto& cursor : cursors_) priority_queue_.push(MergeSortCursor(&cursor)); + for (auto& cursor : cursors_) { + priority_queue_.emplace(&cursor); + } } } diff --git a/be/src/vec/common/sort/sorter.h b/be/src/vec/common/sort/sorter.h index e372e6367d8643..e0a2b92ceed5a3 100644 --- a/be/src/vec/common/sort/sorter.h +++ b/be/src/vec/common/sort/sorter.h @@ -88,7 +88,7 @@ class MergeSorterState { bool is_spilled() const { return is_spilled_; } - const Block& last_sorted_block() const { return sorted_blocks_.back(); } + Block& last_sorted_block() { return sorted_blocks_.back(); } std::vector& get_sorted_block() { return sorted_blocks_; } std::priority_queue& get_priority_queue() { return priority_queue_; } diff --git a/be/src/vec/common/typeid_cast.h b/be/src/vec/common/typeid_cast.h index fefd38409fadfe..85f99b492cdeb9 100644 --- a/be/src/vec/common/typeid_cast.h +++ b/be/src/vec/common/typeid_cast.h @@ -59,13 +59,18 @@ To typeid_cast(From& from) { template To typeid_cast(From* from) { +#ifndef NDEBUG try { if (typeid(*from) == typeid(std::remove_pointer_t)) { return static_cast(from); - } else { - return nullptr; } } catch (const std::exception& e) { throw doris::Exception(doris::ErrorCode::BAD_CAST, e.what()); } +#else + if (typeid(*from) == typeid(std::remove_pointer_t)) { + return static_cast(from); + } +#endif + return nullptr; } diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp index 80a10c528ebb14..e3ce4885cb08af 100644 --- a/be/src/vec/core/block.cpp +++ b/be/src/vec/core/block.cpp @@ -192,18 +192,6 @@ void Block::insert(ColumnWithTypeAndName&& elem) { data.emplace_back(std::move(elem)); } -void Block::insert_unique(const ColumnWithTypeAndName& elem) { - if (index_by_name.end() == index_by_name.find(elem.name)) { - insert(elem); - } -} - -void Block::insert_unique(ColumnWithTypeAndName&& elem) { - if (index_by_name.end() == index_by_name.find(elem.name)) { - insert(std::move(elem)); - } -} - void Block::erase(const std::set& positions) { for (unsigned long position : std::ranges::reverse_view(positions)) { erase(position); @@ -415,9 +403,9 @@ size_t Block::bytes() const { for (const auto& e : data) { ss << e.name + " "; } - LOG(FATAL) << fmt::format( - "Column {} in block is nullptr, in method bytes. All Columns are {}", elem.name, - ss.str()); + throw Exception(ErrorCode::INTERNAL_ERROR, + "Column {} in block is nullptr, in method bytes. All Columns are {}", + elem.name, ss.str()); } res += elem.column->byte_size(); } @@ -433,9 +421,9 @@ size_t Block::allocated_bytes() const { for (const auto& e : data) { ss << e.name + " "; } - LOG(FATAL) << fmt::format( - "Column {} in block is nullptr, in method allocated_bytes. All Columns are {}", - elem.name, ss.str()); + throw Exception(ErrorCode::INTERNAL_ERROR, + "Column {} in block is nullptr, in method bytes. All Columns are {}", + elem.name, ss.str()); } res += elem.column->allocated_bytes(); } @@ -568,6 +556,16 @@ Columns Block::get_columns() const { size_t num_columns = data.size(); Columns columns(num_columns); for (size_t i = 0; i < num_columns; ++i) { + columns[i] = data[i].column->convert_to_full_column_if_const(); + } + return columns; +} + +Columns Block::get_columns_and_convert() { + size_t num_columns = data.size(); + Columns columns(num_columns); + for (size_t i = 0; i < num_columns; ++i) { + data[i].column = data[i].column->convert_to_full_column_if_const(); columns[i] = data[i].column; } return columns; diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h index 8433ebf074cbb7..ec9e3835222f4c 100644 --- a/be/src/vec/core/block.h +++ b/be/src/vec/core/block.h @@ -105,9 +105,6 @@ class Block { /// insert the column to the end void insert(const ColumnWithTypeAndName& elem); void insert(ColumnWithTypeAndName&& elem); - /// insert the column to the end, if there is no column with that name yet - void insert_unique(const ColumnWithTypeAndName& elem); - void insert_unique(ColumnWithTypeAndName&& elem); /// remove the column at the specified position void erase(size_t position); /// remove the column at the [start, end) @@ -214,6 +211,8 @@ class Block { Block clone_empty() const; Columns get_columns() const; + Columns get_columns_and_convert(); + void set_columns(const Columns& columns); Block clone_with_columns(const Columns& columns) const; Block clone_without_columns() const; diff --git a/be/src/vec/core/column_with_type_and_name.cpp b/be/src/vec/core/column_with_type_and_name.cpp index 9ac2bbe6e4476b..cd0f7194004073 100644 --- a/be/src/vec/core/column_with_type_and_name.cpp +++ b/be/src/vec/core/column_with_type_and_name.cpp @@ -30,6 +30,7 @@ #include "vec/columns/column.h" #include "vec/core/types.h" #include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_nullable.h" namespace doris::vectorized { @@ -87,4 +88,28 @@ void ColumnWithTypeAndName::to_pb_column_meta(PColumnMeta* col_meta) const { type->to_pb_column_meta(col_meta); } +ColumnWithTypeAndName ColumnWithTypeAndName::get_nested(bool replace_null_data_to_default) const { + if (type->is_nullable()) { + auto nested_type = assert_cast(type.get())->get_nested_type(); + ColumnPtr nested_column = column; + if (column) { + nested_column = nested_column->convert_to_full_column_if_const(); + const auto* source_column = assert_cast(nested_column.get()); + nested_column = source_column->get_nested_column_ptr(); + + if (replace_null_data_to_default) { + const auto& null_map = source_column->get_null_map_data(); + // only need to mutate nested column, avoid to copy nullmap + auto mutable_nested_col = (*std::move(nested_column)).mutate(); + mutable_nested_col->replace_column_null_data(null_map.data()); + + return {std::move(mutable_nested_col), nested_type, ""}; + } + } + return {nested_column, nested_type, ""}; + } else { + return {column, type, ""}; + } +} + } // namespace doris::vectorized diff --git a/be/src/vec/core/column_with_type_and_name.h b/be/src/vec/core/column_with_type_and_name.h index caf68f46260db1..53ca6f20b2dd6d 100644 --- a/be/src/vec/core/column_with_type_and_name.h +++ b/be/src/vec/core/column_with_type_and_name.h @@ -25,6 +25,7 @@ #include #include #include +#include #include "vec/core/types.h" #include "vec/data_types/data_type.h" @@ -47,13 +48,13 @@ struct ColumnWithTypeAndName { DataTypePtr type; String name; - ColumnWithTypeAndName() {} - ColumnWithTypeAndName(const ColumnPtr& column_, const DataTypePtr& type_, const String& name_) - : column(column_), type(type_), name(name_) {} + ColumnWithTypeAndName() = default; + ColumnWithTypeAndName(ColumnPtr column_, DataTypePtr type_, String name_) + : column(std::move(column_)), type(std::move(type_)), name(std::move(name_)) {} /// Uses type->create_column() to create column - ColumnWithTypeAndName(const DataTypePtr& type_, const String& name_) - : column(type_->create_column()), type(type_), name(name_) {} + ColumnWithTypeAndName(const DataTypePtr& type_, String name_) + : column(type_->create_column()), type(type_), name(std::move(name_)) {} ColumnWithTypeAndName clone_empty() const; bool operator==(const ColumnWithTypeAndName& other) const; @@ -63,6 +64,8 @@ struct ColumnWithTypeAndName { std::string to_string(size_t row_num) const; void to_pb_column_meta(PColumnMeta* col_meta) const; + + ColumnWithTypeAndName get_nested(bool replace_null_data_to_default = false) const; }; } // namespace doris::vectorized diff --git a/be/src/vec/core/sort_cursor.h b/be/src/vec/core/sort_cursor.h index b43449b50b0472..9e40f40cf18c45 100644 --- a/be/src/vec/core/sort_cursor.h +++ b/be/src/vec/core/sort_cursor.h @@ -47,7 +47,7 @@ struct HeapSortCursorBlockView { private: void _reset() { sort_columns.clear(); - auto columns = block.get_columns(); + auto columns = block.get_columns_and_convert(); for (size_t j = 0, size = desc.size(); j < size; ++j) { auto& column_desc = desc[j]; size_t column_number = !column_desc.column_name.empty() @@ -161,7 +161,7 @@ struct MergeSortCursorImpl { MergeSortCursorImpl() = default; virtual ~MergeSortCursorImpl() = default; - MergeSortCursorImpl(const Block& block, const SortDescription& desc_) + MergeSortCursorImpl(Block& block, const SortDescription& desc_) : desc(desc_), sort_columns_size(desc.size()) { reset(block); } @@ -171,13 +171,11 @@ struct MergeSortCursorImpl { bool empty() const { return rows == 0; } /// Set the cursor to the beginning of the new block. - void reset(const Block& block) { reset(block.get_columns(), block); } - - /// Set the cursor to the beginning of the new block. - void reset(const Columns& columns, const Block& block) { + void reset(Block& block) { all_columns.clear(); sort_columns.clear(); + auto columns = block.get_columns_and_convert(); size_t num_columns = columns.size(); for (size_t j = 0; j < num_columns; ++j) { diff --git a/be/src/vec/data_types/data_type.h b/be/src/vec/data_types/data_type.h index 7807f2a5d587fb..8fa41f88886a18 100644 --- a/be/src/vec/data_types/data_type.h +++ b/be/src/vec/data_types/data_type.h @@ -199,10 +199,6 @@ class IDataType : private boost::noncopyable { virtual bool is_nullable() const { return false; } - /** Is this type can represent only NULL value? (It also implies is_nullable) - */ - virtual bool only_null() const { return false; } - /* the data type create from type_null, NULL literal*/ virtual bool is_null_literal() const { return false; } diff --git a/be/src/vec/data_types/data_type_nullable.cpp b/be/src/vec/data_types/data_type_nullable.cpp index 1e4ecb1cf29f43..f160a1f323a8ce 100644 --- a/be/src/vec/data_types/data_type_nullable.cpp +++ b/be/src/vec/data_types/data_type_nullable.cpp @@ -49,10 +49,6 @@ DataTypeNullable::DataTypeNullable(const DataTypePtr& nested_data_type_) } } -bool DataTypeNullable::only_null() const { - return typeid_cast(nested_data_type.get()); -} - std::string DataTypeNullable::to_string(const IColumn& column, size_t row_num) const { auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; diff --git a/be/src/vec/data_types/data_type_nullable.h b/be/src/vec/data_types/data_type_nullable.h index fc958096adfe94..12410b70bd1322 100644 --- a/be/src/vec/data_types/data_type_nullable.h +++ b/be/src/vec/data_types/data_type_nullable.h @@ -109,7 +109,6 @@ class DataTypeNullable final : public IDataType { } bool is_nullable() const override { return true; } size_t get_size_of_value_in_memory() const override; - bool only_null() const override; bool can_be_inside_low_cardinality() const override { return nested_data_type->can_be_inside_low_cardinality(); } diff --git a/be/src/vec/data_types/get_least_supertype.cpp b/be/src/vec/data_types/get_least_supertype.cpp index 49c5bacce10f77..8d5662b9bed801 100644 --- a/be/src/vec/data_types/get_least_supertype.cpp +++ b/be/src/vec/data_types/get_least_supertype.cpp @@ -291,9 +291,7 @@ void get_least_supertype(const DataTypes& types, DataTypePtr* type) { typeid_cast(type.get())) { have_nullable = true; - if (!type_nullable->only_null()) { - nested_types.emplace_back(type_nullable->get_nested_type()); - } + nested_types.emplace_back(type_nullable->get_nested_type()); } else { nested_types.emplace_back(type); } diff --git a/be/src/vec/exec/vjdbc_connector.cpp b/be/src/vec/exec/vjdbc_connector.cpp index 56205d2ffa9b7a..c80d84f98be8b6 100644 --- a/be/src/vec/exec/vjdbc_connector.cpp +++ b/be/src/vec/exec/vjdbc_connector.cpp @@ -81,7 +81,7 @@ Status JdbcConnector::close(Status /*unused*/) { return Status::OK(); } if (_is_in_transaction) { - static_cast(abort_trans()); + RETURN_IF_ERROR(abort_trans()); } JNIEnv* env; RETURN_IF_ERROR(JniUtil::GetJNIEnv(&env)); @@ -117,7 +117,7 @@ Status JdbcConnector::open(RuntimeState* state, bool read) { { std::string local_location; std::hash hash_str; - auto function_cache = UserFunctionCache::instance(); + auto* function_cache = UserFunctionCache::instance(); if (_conn_param.resource_name.empty()) { // for jdbcExternalTable, _conn_param.resource_name == "" // so, we use _conn_param.driver_path as key of jarpath @@ -164,7 +164,7 @@ Status JdbcConnector::open(RuntimeState* state, bool read) { RETURN_ERROR_IF_EXC(env); RETURN_IF_ERROR(JniUtil::LocalToGlobalRef(env, _executor_obj, &_executor_obj)); _is_open = true; - static_cast(begin_trans()); + RETURN_IF_ERROR(begin_trans()); return Status::OK(); } @@ -622,11 +622,11 @@ Status JdbcConnector::_cast_string_to_special(Block* block, JNIEnv* env, size_t RETURN_IF_ERROR(JniUtil::GetJniExceptionMsg(env)); if (slot_desc->type().is_hll_type()) { - static_cast(_cast_string_to_hll(slot_desc, block, column_index, num_rows)); + RETURN_IF_ERROR(_cast_string_to_hll(slot_desc, block, column_index, num_rows)); } else if (slot_desc->type().is_json_type()) { - static_cast(_cast_string_to_json(slot_desc, block, column_index, num_rows)); + RETURN_IF_ERROR(_cast_string_to_json(slot_desc, block, column_index, num_rows)); } else if (slot_desc->type().is_bitmap_type()) { - static_cast(_cast_string_to_bitmap(slot_desc, block, column_index, num_rows)); + RETURN_IF_ERROR(_cast_string_to_bitmap(slot_desc, block, column_index, num_rows)); } } return Status::OK(); @@ -654,7 +654,7 @@ Status JdbcConnector::_cast_string_to_hll(const SlotDescriptor* slot_desc, Block Block cast_block(argument_template); int result_idx = cast_block.columns(); cast_block.insert({nullptr, make_nullable(_target_data_type), "cast_result"}); - static_cast(func_cast->execute(nullptr, cast_block, {0, 1}, result_idx, rows)); + RETURN_IF_ERROR(func_cast->execute(nullptr, cast_block, {0}, result_idx, rows)); auto res_col = cast_block.get_by_position(result_idx).column; block->get_by_position(column_index).type = _target_data_type; @@ -691,7 +691,7 @@ Status JdbcConnector::_cast_string_to_bitmap(const SlotDescriptor* slot_desc, Bl Block cast_block(argument_template); int result_idx = cast_block.columns(); cast_block.insert({nullptr, make_nullable(_target_data_type), "cast_result"}); - static_cast(func_cast->execute(nullptr, cast_block, {0, 1}, result_idx, rows)); + RETURN_IF_ERROR(func_cast->execute(nullptr, cast_block, {0}, result_idx, rows)); auto res_col = cast_block.get_by_position(result_idx).column; block->get_by_position(column_index).type = _target_data_type; @@ -728,7 +728,7 @@ Status JdbcConnector::_cast_string_to_json(const SlotDescriptor* slot_desc, Bloc Block cast_block(argument_template); int result_idx = cast_block.columns(); cast_block.insert({nullptr, make_nullable(_target_data_type), "cast_result"}); - static_cast(func_cast->execute(nullptr, cast_block, {0, 1}, result_idx, rows)); + RETURN_IF_ERROR(func_cast->execute(nullptr, cast_block, {0}, result_idx, rows)); auto res_col = cast_block.get_by_position(result_idx).column; block->get_by_position(column_index).type = _target_data_type; diff --git a/be/src/vec/exprs/vcast_expr.cpp b/be/src/vec/exprs/vcast_expr.cpp index 47733a177db238..3207ba5b5419a6 100644 --- a/be/src/vec/exprs/vcast_expr.cpp +++ b/be/src/vec/exprs/vcast_expr.cpp @@ -106,9 +106,6 @@ doris::Status VCastExpr::execute(VExprContext* context, doris::vectorized::Block int column_id = 0; RETURN_IF_ERROR(_children[0]->execute(context, block, &column_id)); - size_t const_param_id = VExpr::insert_param( - block, {_cast_param, _cast_param_data_type, _target_data_type_name}, block->rows()); - // call function size_t num_columns_without_result = block->columns(); // prepare a column to save result @@ -117,8 +114,8 @@ doris::Status VCastExpr::execute(VExprContext* context, doris::vectorized::Block auto state = Status::OK(); try { state = _function->execute(context->fn_context(_fn_context_index), *block, - {static_cast(column_id), const_param_id}, - num_columns_without_result, block->rows(), false); + {static_cast(column_id)}, num_columns_without_result, + block->rows(), false); *result_column_id = num_columns_without_result; } catch (const Exception& e) { state = e.to_status(); diff --git a/be/src/vec/functions/function.cpp b/be/src/vec/functions/function.cpp index 6e7f6572ab8667..8df03496c11fbe 100644 --- a/be/src/vec/functions/function.cpp +++ b/be/src/vec/functions/function.cpp @@ -48,44 +48,36 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const Colum ColumnPtr src_not_nullable = src; MutableColumnPtr mutable_result_null_map_column; - if (auto* nullable = check_and_get_column(*src)) { + if (const auto* nullable = check_and_get_column(*src)) { src_not_nullable = nullable->get_nested_column_ptr(); result_null_map_column = nullable->get_null_map_column_ptr(); } for (const auto& arg : args) { const ColumnWithTypeAndName& elem = block.get_by_position(arg); - if (!elem.type->is_nullable()) { + if (!elem.type->is_nullable() || is_column_const(*elem.column)) { continue; } - bool is_const = is_column_const(*elem.column); - /// Const Nullable that are NULL. - if (is_const && assert_cast(elem.column.get())->only_null()) { - return block.get_by_position(result).type->create_column_const(input_rows_count, - Null()); - } - if (is_const) { - continue; - } - - if (auto* nullable = assert_cast(elem.column.get())) { + if (const auto* nullable = assert_cast(elem.column.get()); + nullable->has_null()) { const ColumnPtr& null_map_column = nullable->get_null_map_column_ptr(); if (!result_null_map_column) { result_null_map_column = null_map_column->clone_resized(input_rows_count); - } else { - if (!mutable_result_null_map_column) { - mutable_result_null_map_column = - std::move(result_null_map_column)->assume_mutable(); - } - - NullMap& result_null_map = - assert_cast(*mutable_result_null_map_column).get_data(); - const NullMap& src_null_map = - assert_cast(*null_map_column).get_data(); - - VectorizedUtils::update_null_map(result_null_map, src_null_map); + continue; } + + if (!mutable_result_null_map_column) { + mutable_result_null_map_column = + std::move(result_null_map_column)->assume_mutable(); + } + + NullMap& result_null_map = + assert_cast(*mutable_result_null_map_column).get_data(); + const NullMap& src_null_map = + assert_cast(*null_map_column).get_data(); + + VectorizedUtils::update_null_map(result_null_map, src_null_map); } } @@ -99,45 +91,22 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const Colum return ColumnNullable::create(src, ColumnUInt8::create(input_rows_count, 0)); } - return ColumnNullable::create(src_not_nullable->convert_to_full_column_if_const(), - result_null_map_column); + return ColumnNullable::create(src_not_nullable, result_null_map_column); } -NullPresence get_null_presence(const Block& block, const ColumnNumbers& args) { - NullPresence res; - - for (const auto& arg : args) { - const auto& elem = block.get_by_position(arg); - - if (!res.has_nullable) { - res.has_nullable = elem.type->is_nullable(); - } - if (!res.has_null_constant) { - res.has_null_constant = elem.type->only_null(); - } - } - - return res; +bool get_null_presence(const Block& block, const ColumnNumbers& args) { + return std::ranges::any_of(args, [&block](const auto& elem) { + return block.get_by_position(elem).type->is_nullable(); + }); } -[[maybe_unused]] NullPresence get_null_presence(const ColumnsWithTypeAndName& args) { - NullPresence res; - - for (const auto& elem : args) { - if (!res.has_nullable) { - res.has_nullable = elem.type->is_nullable(); - } - if (!res.has_null_constant) { - res.has_null_constant = elem.type->only_null(); - } - } - - return res; +bool get_null_presence(const ColumnsWithTypeAndName& args) { + return std::ranges::any_of(args, [](const auto& elem) { return elem.type->is_nullable(); }); } inline Status PreparedFunctionImpl::_execute_skipped_constant_deal( FunctionContext* context, Block& block, const ColumnNumbers& args, size_t result, - size_t input_rows_count, bool dry_run) { + size_t input_rows_count, bool dry_run) const { bool executed = false; RETURN_IF_ERROR(default_implementation_for_nulls(context, block, args, result, input_rows_count, dry_run, &executed)); @@ -154,7 +123,7 @@ inline Status PreparedFunctionImpl::_execute_skipped_constant_deal( Status PreparedFunctionImpl::default_implementation_for_constant_arguments( FunctionContext* context, Block& block, const ColumnNumbers& args, size_t result, - size_t input_rows_count, bool dry_run, bool* executed) { + size_t input_rows_count, bool dry_run, bool* executed) const { *executed = false; ColumnNumbers args_expect_const = get_arguments_that_are_always_constant(); @@ -218,45 +187,51 @@ Status PreparedFunctionImpl::default_implementation_for_constant_arguments( Status PreparedFunctionImpl::default_implementation_for_nulls( FunctionContext* context, Block& block, const ColumnNumbers& args, size_t result, - size_t input_rows_count, bool dry_run, bool* executed) { + size_t input_rows_count, bool dry_run, bool* executed) const { *executed = false; if (args.empty() || !use_default_implementation_for_nulls()) { return Status::OK(); } - NullPresence null_presence = get_null_presence(block, args); - - if (null_presence.has_null_constant) { + if (std::ranges::any_of(args, [&block](const auto& elem) { + return block.get_by_position(elem).column->only_null(); + })) { block.get_by_position(result).column = block.get_by_position(result).type->create_column_const(input_rows_count, Null()); *executed = true; return Status::OK(); } - if (null_presence.has_nullable) { - bool check_overflow_for_decimal = false; + if (get_null_presence(block, args)) { + bool need_to_default = need_replace_null_data_to_default(); if (context) { - check_overflow_for_decimal = context->check_overflow_for_decimal(); + need_to_default &= context->check_overflow_for_decimal(); + } + ColumnNumbers new_args; + for (auto arg : args) { + new_args.push_back(block.columns()); + block.insert(block.get_by_position(arg).get_nested(need_to_default)); + DCHECK(!block.get_by_position(new_args.back()).column->is_nullable()); } - auto [temporary_block, new_args, new_result] = create_block_with_nested_columns( - block, args, result, - check_overflow_for_decimal && need_replace_null_data_to_default()); - RETURN_IF_ERROR(execute_without_low_cardinality_columns( - context, temporary_block, new_args, new_result, temporary_block.rows(), dry_run)); - block.get_by_position(result).column = - wrap_in_nullable(temporary_block.get_by_position(new_result).column, block, args, - result, input_rows_count); + RETURN_IF_ERROR(execute_without_low_cardinality_columns(context, block, new_args, result, + block.rows(), dry_run)); + block.get_by_position(result).column = wrap_in_nullable( + block.get_by_position(result).column, block, args, result, input_rows_count); + + while (!new_args.empty()) { + block.erase(new_args.back()); + new_args.pop_back(); + } *executed = true; return Status::OK(); } - *executed = false; return Status::OK(); } Status PreparedFunctionImpl::execute_without_low_cardinality_columns( FunctionContext* context, Block& block, const ColumnNumbers& args, size_t result, - size_t input_rows_count, bool dry_run) { + size_t input_rows_count, bool dry_run) const { bool executed = false; RETURN_IF_ERROR(default_implementation_for_constant_arguments( @@ -270,7 +245,7 @@ Status PreparedFunctionImpl::execute_without_low_cardinality_columns( Status PreparedFunctionImpl::execute(FunctionContext* context, Block& block, const ColumnNumbers& args, size_t result, - size_t input_rows_count, bool dry_run) { + size_t input_rows_count, bool dry_run) const { return execute_without_low_cardinality_columns(context, block, args, result, input_rows_count, dry_run); } @@ -292,12 +267,7 @@ DataTypePtr FunctionBuilderImpl::get_return_type_without_low_cardinality( check_number_of_arguments(arguments.size()); if (!arguments.empty() && use_default_implementation_for_nulls()) { - NullPresence null_presence = get_null_presence(arguments); - - if (null_presence.has_null_constant) { - return make_nullable(std::make_shared()); - } - if (null_presence.has_nullable) { + if (get_null_presence(arguments)) { ColumnNumbers numbers(arguments.size()); std::iota(numbers.begin(), numbers.end(), 0); auto [nested_block, _] = diff --git a/be/src/vec/functions/function.h b/be/src/vec/functions/function.h index 1b4c9fe128c32a..0ca2899d7481d5 100644 --- a/be/src/vec/functions/function.h +++ b/be/src/vec/functions/function.h @@ -60,18 +60,13 @@ template auto has_variadic_argument_types(T&& arg) -> decltype(T::get_variadic_argument_types()) {}; void has_variadic_argument_types(...); -struct NullPresence { - bool has_nullable = false; - bool has_null_constant = false; -}; - template concept HasGetVariadicArgumentTypesImpl = requires(T t) { { t.get_variadic_argument_types_impl() } -> std::same_as; }; -NullPresence get_null_presence(const Block& block, const ColumnNumbers& args); -[[maybe_unused]] NullPresence get_null_presence(const ColumnsWithTypeAndName& args); +bool get_null_presence(const Block& block, const ColumnNumbers& args); +bool get_null_presence(const ColumnsWithTypeAndName& args); /// The simplest executable object. /// Motivation: @@ -85,7 +80,7 @@ class IPreparedFunction { virtual String get_name() const = 0; virtual Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count, bool dry_run) = 0; + size_t result, size_t input_rows_count, bool dry_run) const = 0; }; using PreparedFunctionPtr = std::shared_ptr; @@ -93,7 +88,7 @@ using PreparedFunctionPtr = std::shared_ptr; class PreparedFunctionImpl : public IPreparedFunction { public: Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count, bool dry_run = false) final; + size_t result, size_t input_rows_count, bool dry_run = false) const final; /** If the function have non-zero number of arguments, * and if all arguments are constant, that we could automatically provide default implementation: @@ -111,7 +106,7 @@ class PreparedFunctionImpl : public IPreparedFunction { protected: virtual Status execute_impl_dry_run(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, - size_t input_rows_count) { + size_t input_rows_count) const { return execute_impl(context, block, arguments, result, input_rows_count); } @@ -141,17 +136,18 @@ class PreparedFunctionImpl : public IPreparedFunction { private: Status default_implementation_for_nulls(FunctionContext* context, Block& block, const ColumnNumbers& args, size_t result, - size_t input_rows_count, bool dry_run, bool* executed); + size_t input_rows_count, bool dry_run, + bool* executed) const; Status default_implementation_for_constant_arguments(FunctionContext* context, Block& block, const ColumnNumbers& args, size_t result, size_t input_rows_count, bool dry_run, - bool* executed); + bool* executed) const; Status execute_without_low_cardinality_columns(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, - size_t input_rows_count, bool dry_run); + size_t input_rows_count, bool dry_run) const; Status _execute_skipped_constant_deal(FunctionContext* context, Block& block, const ColumnNumbers& args, size_t result, - size_t input_rows_count, bool dry_run); + size_t input_rows_count, bool dry_run) const; }; /// Function with known arguments and return type. @@ -178,7 +174,7 @@ class IFunctionBase { /// TODO: make const virtual Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count, bool dry_run = false) { + size_t result, size_t input_rows_count, bool dry_run = false) const { return prepare(context, block, arguments, result) ->execute(context, block, arguments, result, input_rows_count, dry_run); } @@ -511,7 +507,7 @@ class DefaultExecutable final : public PreparedFunctionImpl { } Status execute_impl_dry_run(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, - size_t input_rows_count) final { + size_t input_rows_count) const final { return function->execute_impl_dry_run(context, block, arguments, result, input_rows_count); } bool use_default_implementation_for_nulls() const final { diff --git a/be/src/vec/functions/function_bitmap.cpp b/be/src/vec/functions/function_bitmap.cpp index ac80542f63349f..08b15098755dd7 100644 --- a/be/src/vec/functions/function_bitmap.cpp +++ b/be/src/vec/functions/function_bitmap.cpp @@ -700,12 +700,7 @@ Status execute_bitmap_op_count_null_to_zero( size_t input_rows_count, const std::function& exec_impl_func) { - NullPresence null_presence = get_null_presence(block, arguments); - - if (null_presence.has_null_constant) { - block.get_by_position(result).column = - block.get_by_position(result).type->create_column_const(input_rows_count, 0); - } else if (null_presence.has_nullable) { + if (get_null_presence(block, arguments)) { auto [temporary_block, new_args, new_result] = create_block_with_nested_columns(block, arguments, result); RETURN_IF_ERROR(exec_impl_func(context, temporary_block, new_args, new_result, diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index 57678b02b6880c..82e4b3d972bfa4 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -94,6 +94,7 @@ #include "vec/functions/function_helpers.h" #include "vec/io/reader_buffer.h" #include "vec/runtime/vdatetime_value.h" +#include "vec/utils/util.hpp" class DateLUTImpl; @@ -1454,12 +1455,7 @@ class PreparedFunctionCast : public PreparedFunctionImpl { protected: Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) const override { - /// drop second argument, pass others - ColumnNumbers new_arguments {arguments.front()}; - if (arguments.size() > 2) - new_arguments.insert(std::end(new_arguments), std::next(std::begin(arguments), 2), - std::end(arguments)); - return wrapper_function(context, block, new_arguments, result, input_rows_count); + return wrapper_function(context, block, arguments, result, input_rows_count); } bool use_default_implementation_for_nulls() const override { return false; } @@ -1547,7 +1543,7 @@ struct ConvertThroughParsing { res == StringParser::PARSE_OVERFLOW || res == StringParser::PARSE_UNDERFLOW); } else if constexpr (IsDataTypeDateTimeV2) { - auto type = check_and_get_data_type( + const auto* type = assert_cast( block.get_by_position(result).type.get()); parsed = try_parse_impl(vec_to[i], read_buffer, context->state()->timezone_obj(), @@ -2251,7 +2247,7 @@ class FunctionCast final : public IFunctionBase { const auto& from_nested = from_type; const auto& to_nested = to_type; - if (from_type->only_null() || from_type->is_null_literal()) { + if (from_type->is_null_literal()) { if (!to_nested->is_nullable()) { return create_unsupport_wrapper("Cannot convert NULL to a non-nullable type"); } @@ -2276,82 +2272,32 @@ class FunctionCast final : public IFunctionBase { const DataTypePtr& to_type, bool skip_not_null_check) const { /// Determine whether pre-processing and/or post-processing must take place during conversion. - bool source_is_nullable = from_type->is_nullable(); bool result_is_nullable = to_type->is_nullable(); - auto wrapper = prepare_impl(context, remove_nullable(from_type), remove_nullable(to_type), - result_is_nullable); - if (result_is_nullable) { - return [wrapper, source_is_nullable](FunctionContext* context, Block& block, - const ColumnNumbers& arguments, - const size_t result, size_t input_rows_count) { - /// Create a temporary block on which to perform the operation. - auto& res = block.get_by_position(result); - const auto& ret_type = res.type; - const auto& nullable_type = static_cast(*ret_type); - const auto& nested_type = nullable_type.get_nested_type(); - - Block tmp_block; - size_t tmp_res_index = 0; - if (source_is_nullable) { - auto [t_block, tmp_args] = - create_block_with_nested_columns(block, arguments, true); - tmp_block = std::move(t_block); - tmp_res_index = tmp_block.columns(); - tmp_block.insert({nullptr, nested_type, ""}); - - /// Perform the requested conversion. - RETURN_IF_ERROR( - wrapper(context, tmp_block, {0}, tmp_res_index, input_rows_count)); - } else { - tmp_block = block; - - tmp_res_index = block.columns(); - tmp_block.insert({nullptr, nested_type, ""}); - - /// Perform the requested conversion. - RETURN_IF_ERROR(wrapper(context, tmp_block, arguments, tmp_res_index, - input_rows_count)); - } - - // Note: here we should return the nullable result column - const auto& tmp_res = tmp_block.get_by_position(tmp_res_index); - res.column = wrap_in_nullable(tmp_res.column, - Block({block.get_by_position(arguments[0]), tmp_res}), - {0}, 1, input_rows_count); - - return Status::OK(); - }; - } else if (source_is_nullable) { - /// Conversion from Nullable to non-Nullable. - - return [wrapper, skip_not_null_check](FunctionContext* context, Block& block, - const ColumnNumbers& arguments, - const size_t result, size_t input_rows_count) { - auto [tmp_block, tmp_args, tmp_res] = - create_block_with_nested_columns(block, arguments, result); - - /// Check that all values are not-NULL. - /// Check can be skipped in case if LowCardinality dictionary is transformed. - /// In that case, correctness will be checked beforehand. - if (!skip_not_null_check) { - const auto& col = block.get_by_position(arguments[0]).column; - const auto& nullable_col = assert_cast(*col); - const auto& null_map = nullable_col.get_null_map_data(); - - if (!memory_is_zero(null_map.data(), null_map.size())) { - return Status::RuntimeError( - "Cannot convert NULL value to non-Nullable type"); - } - } - - RETURN_IF_ERROR(wrapper(context, tmp_block, tmp_args, tmp_res, input_rows_count)); - block.get_by_position(result).column = tmp_block.get_by_position(tmp_res).column; + return [this, from_type, to_type](FunctionContext* context, Block& block, + const ColumnNumbers& arguments, const size_t result, + size_t input_rows_count) { + auto nested_result_index = block.columns(); + block.insert(block.get_by_position(result).get_nested()); + auto nested_source_index = block.columns(); + block.insert(block.get_by_position(arguments[0]).get_nested()); + + RETURN_IF_ERROR(prepare_impl(context, remove_nullable(from_type), + remove_nullable(to_type), + true)(context, block, {nested_source_index}, + nested_result_index, input_rows_count)); + + block.get_by_position(result).column = + wrap_in_nullable(block.get_by_position(nested_result_index).column, block, + arguments, result, input_rows_count); + + block.erase(nested_source_index); + block.erase(nested_result_index); return Status::OK(); }; } else { - return wrapper; + return prepare_impl(context, from_type, to_type, false); } } @@ -2359,11 +2305,11 @@ class FunctionCast final : public IFunctionBase { /// 'requested_result_is_nullable' is true if CAST to Nullable type is requested. WrapperType prepare_impl(FunctionContext* context, const DataTypePtr& from_type, const DataTypePtr& to_type, bool requested_result_is_nullable) const { - if (from_type->equals(*to_type)) + if (from_type->equals(*to_type)) { return create_identity_wrapper(from_type); - else if (WhichDataType(from_type).is_nothing()) - return create_nothing_wrapper(to_type.get()); + } + // variant needs to be judged first if (to_type->get_type_id() == TypeIndex::VARIANT) { return create_variant_wrapper(from_type, static_cast(*to_type)); } @@ -2371,15 +2317,14 @@ class FunctionCast final : public IFunctionBase { return create_variant_wrapper(static_cast(*from_type), to_type); } - if (from_type->get_type_id() == TypeIndex::JSONB) { - bool jsonb_string_as_string = context ? context->jsonb_string_as_string() : false; + switch (from_type->get_type_id()) { + case TypeIndex::Nothing: + return create_nothing_wrapper(to_type.get()); + case TypeIndex::JSONB: return create_jsonb_wrapper(static_cast(*from_type), to_type, - jsonb_string_as_string); - } - if (to_type->get_type_id() == TypeIndex::JSONB) { - bool string_as_jsonb_string = context ? context->string_as_jsonb_string() : false; - return create_jsonb_wrapper(from_type, static_cast(*to_type), - string_as_jsonb_string); + context ? context->jsonb_string_as_string() : false); + default: + break; } WrapperType ret; @@ -2425,8 +2370,9 @@ class FunctionCast final : public IFunctionBase { return false; }; - if (call_on_index_and_data_type(to_type->get_type_id(), make_default_wrapper)) + if (call_on_index_and_data_type(to_type->get_type_id(), make_default_wrapper)) { return ret; + } switch (to_type->get_type_id()) { case TypeIndex::String: @@ -2446,6 +2392,9 @@ class FunctionCast final : public IFunctionBase { case TypeIndex::BitMap: return create_bitmap_wrapper(context, from_type, static_cast(*to_type)); + case TypeIndex::JSONB: + return create_jsonb_wrapper(from_type, static_cast(*to_type), + context ? context->string_as_jsonb_string() : false); default: break; } diff --git a/be/src/vec/functions/function_rpc.cpp b/be/src/vec/functions/function_rpc.cpp index 8b8b605188ed36..a900436ffc5e9f 100644 --- a/be/src/vec/functions/function_rpc.cpp +++ b/be/src/vec/functions/function_rpc.cpp @@ -101,7 +101,7 @@ Status FunctionRPC::open(FunctionContext* context, FunctionContext::FunctionStat } Status FunctionRPC::execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count, bool dry_run) { + size_t result, size_t input_rows_count, bool dry_run) const { RPCFnImpl* fn = reinterpret_cast( context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); return fn->vec_call(context, block, arguments, result, input_rows_count); diff --git a/be/src/vec/functions/function_rpc.h b/be/src/vec/functions/function_rpc.h index d10b9be546bbd7..eda4f6b00b1d78 100644 --- a/be/src/vec/functions/function_rpc.h +++ b/be/src/vec/functions/function_rpc.h @@ -95,7 +95,7 @@ class FunctionRPC : public IFunctionBase { Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override; Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count, bool dry_run = false) override; + size_t result, size_t input_rows_count, bool dry_run = false) const override; bool is_deterministic() const override { return false; } diff --git a/be/src/vec/functions/functions_geo.cpp b/be/src/vec/functions/functions_geo.cpp index 34bec39d4ffbee..ac6969c582d167 100644 --- a/be/src/vec/functions/functions_geo.cpp +++ b/be/src/vec/functions/functions_geo.cpp @@ -132,7 +132,7 @@ struct StX { auto pt = point.decode_from(point_value.data, point_value.size); if (!pt) { - res->insert_data(nullptr, 0); + res->insert_default(); continue; } auto x_value = point.x(); @@ -164,7 +164,7 @@ struct StY { auto pt = point.decode_from(point_value.data, point_value.size); if (!pt) { - res->insert_data(nullptr, 0); + res->insert_default(); continue; } auto y_value = point.y(); @@ -199,7 +199,7 @@ struct StDistanceSphere { x_lat->operator[](row).get(), y_lng->operator[](row).get(), y_lat->operator[](row).get(), &distance)) { - res->insert_data(nullptr, 0); + res->insert_default(); continue; } res->insert_data(const_cast((char*)&distance), 0); @@ -233,7 +233,7 @@ struct StAngleSphere { x_lat->operator[](row).get(), y_lng->operator[](row).get(), y_lat->operator[](row).get(), &angle)) { - res->insert_data(nullptr, 0); + res->insert_default(); continue; } res->insert_data(const_cast((char*)&angle), 0); @@ -266,26 +266,26 @@ struct StAngle { auto shape_value1 = p1->get_data_at(row); auto pt1 = point1.decode_from(shape_value1.data, shape_value1.size); if (!pt1) { - res->insert_data(nullptr, 0); + res->insert_default(); continue; } auto shape_value2 = p2->get_data_at(row); auto pt2 = point2.decode_from(shape_value2.data, shape_value2.size); if (!pt2) { - res->insert_data(nullptr, 0); + res->insert_default(); continue; } auto shape_value3 = p3->get_data_at(row); auto pt3 = point3.decode_from(shape_value3.data, shape_value3.size); if (!pt3) { - res->insert_data(nullptr, 0); + res->insert_default(); continue; } double angle = 0; if (!GeoPoint::ComputeAngle(&point1, &point2, &point3, &angle)) { - res->insert_data(nullptr, 0); + res->insert_default(); continue; } res->insert_data(const_cast((char*)&angle), 0); @@ -315,20 +315,20 @@ struct StAzimuth { auto shape_value1 = p1->get_data_at(row); auto pt1 = point1.decode_from(shape_value1.data, shape_value1.size); if (!pt1) { - res->insert_data(nullptr, 0); + res->insert_default(); continue; } auto shape_value2 = p2->get_data_at(row); auto pt2 = point2.decode_from(shape_value2.data, shape_value2.size); if (!pt2) { - res->insert_data(nullptr, 0); + res->insert_default(); continue; } double angle = 0; if (!GeoPoint::ComputeAzimuth(&point1, &point2, &angle)) { - res->insert_data(nullptr, 0); + res->insert_default(); continue; } res->insert_data(const_cast((char*)&angle), 0); @@ -356,13 +356,13 @@ struct StAreaSquareMeters { auto shape_value = col->get_data_at(row); shape = GeoShape::from_encoded(shape_value.data, shape_value.size); if (!shape) { - res->insert_data(nullptr, 0); + res->insert_default(); continue; } double area = 0; if (!GeoShape::ComputeArea(shape.get(), &area, "square_meters")) { - res->insert_data(nullptr, 0); + res->insert_default(); continue; } res->insert_data(const_cast((char*)&area), 0); @@ -391,13 +391,13 @@ struct StAreaSquareKm { auto shape_value = col->get_data_at(row); shape = GeoShape::from_encoded(shape_value.data, shape_value.size); if (!shape) { - res->insert_data(nullptr, 0); + res->insert_default(); continue; } double area = 0; if (!GeoShape::ComputeArea(shape.get(), &area, "square_km")) { - res->insert_data(nullptr, 0); + res->insert_default(); continue; } res->insert_data(const_cast((char*)&area), 0); @@ -479,7 +479,7 @@ struct StContains { shapes[i] = std::shared_ptr( GeoShape::from_encoded(strs[i]->data, strs[i]->size)); if (shapes[i] == nullptr) { - res->insert_data(nullptr, 0); + res->insert_default(); break; } } diff --git a/be/src/vec/functions/functions_geo.h b/be/src/vec/functions/functions_geo.h index 11d2cd7f881b97..9c4db09e14967f 100644 --- a/be/src/vec/functions/functions_geo.h +++ b/be/src/vec/functions/functions_geo.h @@ -68,7 +68,6 @@ class GeoFunction : public IFunction { DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { return make_nullable(std::make_shared()); } - bool use_default_implementation_for_nulls() const override { return true; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) const override { diff --git a/be/src/vec/functions/functions_logical.cpp b/be/src/vec/functions/functions_logical.cpp index 1eceeac3de4aa7..c0b8d62ec259ba 100644 --- a/be/src/vec/functions/functions_logical.cpp +++ b/be/src/vec/functions/functions_logical.cpp @@ -187,9 +187,8 @@ DataTypePtr FunctionAnyArityLogical::get_return_type_impl( } } - if (!(is_native_number(arg_type) || - (Impl::special_implementation_for_nulls() && - (arg_type->only_null() || is_native_number(remove_nullable(arg_type)))))) { + if (!(is_native_number(arg_type) || (Impl::special_implementation_for_nulls() && + is_native_number(remove_nullable(arg_type))))) { LOG(FATAL) << fmt::format("Illegal type ({}) of {} argument of function {}", arg_type->get_name(), i + 1, get_name()); } diff --git a/be/src/vec/functions/least_greast.cpp b/be/src/vec/functions/least_greast.cpp index 9d8cd318b499cf..06f14ec669ee80 100644 --- a/be/src/vec/functions/least_greast.cpp +++ b/be/src/vec/functions/least_greast.cpp @@ -190,8 +190,7 @@ struct FunctionFieldImpl { for (int row = 0; row < input_rows_count; ++row) { const auto& str_data = column_string.get_data_at(index_check_const(row, arg_const)); for (int col = 1; col < column_size; ++col) { - auto [column, is_const] = - unpack_if_const(block.safe_get_by_position(col).column); + auto [column, is_const] = unpack_if_const(argument_columns[col]); const auto& temp_data = assert_cast(*column).get_data_at( index_check_const(row, is_const)); if (EqualsOp::apply(temp_data, str_data)) { diff --git a/be/src/vec/functions/nullif.cpp b/be/src/vec/functions/nullif.cpp index 2fccee27d4d312..315ca52d1bc423 100644 --- a/be/src/vec/functions/nullif.cpp +++ b/be/src/vec/functions/nullif.cpp @@ -50,11 +50,6 @@ class FunctionContext; namespace doris::vectorized { class FunctionNullIf : public IFunction { public: - struct NullPresence { - bool has_nullable = false; - bool has_null_constant = false; - }; - static constexpr auto name = "nullif"; static FunctionPtr create() { return std::make_shared(); } @@ -69,33 +64,18 @@ class FunctionNullIf : public IFunction { return make_nullable(arguments[0]); } - NullPresence get_null_resense(const ColumnsWithTypeAndName& args) const { - NullPresence res; - - for (const auto& elem : args) { - if (!res.has_nullable) res.has_nullable = elem.type->is_nullable(); - if (!res.has_null_constant) res.has_null_constant = elem.type->only_null(); - } - - return res; - } - - DataTypePtr get_return_type_for_equal(const ColumnsWithTypeAndName& arguments) const { + static DataTypePtr get_return_type_for_equal(const ColumnsWithTypeAndName& arguments) { ColumnsWithTypeAndName args_without_low_cardinality(arguments); for (ColumnWithTypeAndName& arg : args_without_low_cardinality) { bool is_const = arg.column && is_column_const(*arg.column); - if (is_const) + if (is_const) { arg.column = assert_cast(*arg.column).remove_low_cardinality(); + } } if (!arguments.empty()) { - NullPresence null_presence = get_null_resense(arguments); - - if (null_presence.has_null_constant) { - return make_nullable(std::make_shared()); - } - if (null_presence.has_nullable) { + if (get_null_presence(arguments)) { return make_nullable(std::make_shared()); } } diff --git a/be/src/vec/runtime/vdata_stream_recvr.h b/be/src/vec/runtime/vdata_stream_recvr.h index 5e64268276e212..141a5c54b64a59 100644 --- a/be/src/vec/runtime/vdata_stream_recvr.h +++ b/be/src/vec/runtime/vdata_stream_recvr.h @@ -202,7 +202,7 @@ class VDataStreamRecvr::SenderQueue { _local_channel_dependency = local_channel_dependency; } - virtual bool should_wait(); + bool should_wait(); virtual Status get_batch(Block* next_block, bool* eos);