Skip to content

Commit

Permalink
[fix](storage) low_cardinality_optimize core dump when is null predic…
Browse files Browse the repository at this point in the history
…ate (apache#9586)

Issue Number: close apache#9555
Make the last value of the dictionary null, when ColumnDict inserts a null value,
add the encoding corresponding to the last value of the dictionary·
  • Loading branch information
zenoyang authored and minghong.zhou committed May 23, 2022
1 parent 4e5f75d commit feef370
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 17 deletions.
7 changes: 5 additions & 2 deletions be/src/olap/comparison_predicate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterPredicate, >)
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)

// todo(zeno) define interface in IColumn to simplify code
// If 1 OP 0 returns true, it means the predicate is > or >=
// If 1 OP 1 returns true, it means the predicate is >= or <=
// by this way, avoid redundant code
#define COMPARISON_PRED_COLUMN_EVALUATE(CLASS, OP, IS_RANGE) \
template <class T> \
void CLASS<T>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const { \
Expand All @@ -161,7 +164,7 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
vectorized::ColumnDictionary<vectorized::Int32>>(nested_col); \
auto& data_array = nested_col_ptr->get_data(); \
auto dict_code = \
IS_RANGE ? nested_col_ptr->find_code_by_bound(_value, 0 OP 1, 1 OP 1) \
IS_RANGE ? nested_col_ptr->find_code_by_bound(_value, 1 OP 0, 1 OP 1) \
: nested_col_ptr->find_code(_value); \
for (uint16_t i = 0; i < *size; i++) { \
uint16_t idx = sel[i]; \
Expand Down Expand Up @@ -190,7 +193,7 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>( \
column); \
auto& data_array = dict_col.get_data(); \
auto dict_code = IS_RANGE ? dict_col.find_code_by_bound(_value, 0 OP 1, 1 OP 1) \
auto dict_code = IS_RANGE ? dict_col.find_code_by_bound(_value, 1 OP 0, 1 OP 1) \
: dict_col.find_code(_value); \
for (uint16_t i = 0; i < *size; ++i) { \
uint16_t idx = sel[i]; \
Expand Down
28 changes: 13 additions & 15 deletions be/src/vec/columns/column_dictionary.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,10 @@ class ColumnDictionary final : public COWHelper<IColumn, ColumnDictionary<T>> {
}

void insert_data(const char* pos, size_t /*length*/) override {
_codes.push_back(unaligned_load<T>(pos));
LOG(FATAL) << "insert_data not supported in ColumnDictionary";
}

void insert_data(const T value) { _codes.push_back(value); }

void insert_default() override { _codes.push_back(T()); }
void insert_default() override { _codes.push_back(_dict.get_null_code()); }

void clear() override {
_codes.clear();
Expand Down Expand Up @@ -219,13 +217,12 @@ class ColumnDictionary final : public COWHelper<IColumn, ColumnDictionary<T>> {
void insert_many_dict_data(const int32_t* data_array, size_t start_index,
const StringRef* dict_array, size_t data_num,
uint32_t dict_num) override {
if (!is_dict_inited()) {
if (_dict.empty()) {
_dict.reserve(dict_num);
for (uint32_t i = 0; i < dict_num; ++i) {
auto value = StringValue(dict_array[i].data, dict_array[i].size);
_dict.insert_value(value);
}
_dict_inited = true;
}

char* end_ptr = (char*)_codes.get_end_ptr();
Expand Down Expand Up @@ -263,8 +260,6 @@ class ColumnDictionary final : public COWHelper<IColumn, ColumnDictionary<T>> {
return _dict.find_codes(values);
}

bool is_dict_inited() const { return _dict_inited; }

bool is_dict_sorted() const { return _dict_sorted; }

bool is_dict_code_converted() const { return _dict_code_converted; }
Expand Down Expand Up @@ -301,13 +296,17 @@ class ColumnDictionary final : public COWHelper<IColumn, ColumnDictionary<T>> {
if (it != _inverted_index.end()) {
return it->second;
}
return -1;
return -2; // -1 is null code
}

inline StringValue& get_value(T code) { return _dict_data[code]; }
T get_null_code() { return -1; }

inline StringValue& get_value(T code) {
return code >= _dict_data.size() ? _null_value : _dict_data[code];
}

inline void generate_hash_values() {
if (_hash_values.size() == 0) {
if (_hash_values.empty()) {
_hash_values.resize(_dict_data.size());
for (size_t i = 0; i < _dict_data.size(); i++) {
auto& sv = _dict_data[i];
Expand Down Expand Up @@ -380,7 +379,10 @@ class ColumnDictionary final : public COWHelper<IColumn, ColumnDictionary<T>> {

size_t byte_size() { return _dict_data.size() * sizeof(_dict_data[0]); }

bool empty() { return _dict_data.empty(); }

private:
StringValue _null_value = StringValue();
StringValue::Comparator _comparator;
// dict code -> dict value
DictContainer _dict_data;
Expand All @@ -398,16 +400,12 @@ class ColumnDictionary final : public COWHelper<IColumn, ColumnDictionary<T>> {

private:
size_t _reserve_size;
bool _dict_inited = false;
bool _dict_sorted = false;
bool _dict_code_converted = false;
Dictionary _dict;
Container _codes;
};

template class ColumnDictionary<uint8_t>;
template class ColumnDictionary<uint16_t>;
template class ColumnDictionary<uint32_t>;
template class ColumnDictionary<int32_t>;

using ColumnDictI32 = vectorized::ColumnDictionary<doris::vectorized::Int32>;
Expand Down

0 comments on commit feef370

Please sign in to comment.