Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[fix](inverted index) ignore_above only affects untokenized strings #28819

Merged
merged 1 commit into from
Dec 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 16 additions & 10 deletions be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,12 +295,15 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
"field or index writer is null in inverted index writer");
}
auto* v = (Slice*)values;
auto ignore_above_value =
get_parser_ignore_above_value_from_properties(_index_meta->properties());
auto ignore_above = std::stoi(ignore_above_value);
for (int i = 0; i < count; ++i) {
auto ignore_above_value =
get_parser_ignore_above_value_from_properties(_index_meta->properties());
auto ignore_above = std::stoi(ignore_above_value);
if (v->get_size() > ignore_above) {
VLOG_DEBUG << "fulltext index value length can be at most 256, but got "
// only ignore_above UNTOKENIZED strings
if (_parser_type == InvertedIndexParserType::PARSER_NONE &&
v->get_size() > ignore_above) {
VLOG_DEBUG << "fulltext index value length can be at most "
<< ignore_above_value << ", but got "
<< "value length:" << v->get_size() << ", ignore this value";
new_fulltext_field(empty_value.c_str(), 0);
RETURN_IF_ERROR(add_null_document());
Expand Down Expand Up @@ -330,6 +333,9 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
return Status::InternalError(
"field or index writer is null in inverted index writer");
}
auto ignore_above_value =
get_parser_ignore_above_value_from_properties(_index_meta->properties());
auto ignore_above = std::stoi(ignore_above_value);
for (int i = 0; i < count; ++i) {
// offsets[i+1] is now row element count
std::vector<std::string> strings;
Expand All @@ -346,11 +352,11 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
}

auto value = join(strings, " ");
auto ignore_above_value =
get_parser_ignore_above_value_from_properties(_index_meta->properties());
auto ignore_above = std::stoi(ignore_above_value);
if (value.length() > ignore_above) {
VLOG_DEBUG << "fulltext index value length can be at most 256, but got "
// only ignore_above UNTOKENIZED strings
if (_parser_type == InvertedIndexParserType::PARSER_NONE &&
value.length() > ignore_above) {
VLOG_DEBUG << "fulltext index value length can be at most "
<< ignore_above_value << ", but got "
<< "value length:" << value.length() << ", ignore this value";
new_fulltext_field(empty_value.c_str(), 0);
RETURN_IF_ERROR(add_null_document());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ CREATE TABLE IF NOT EXISTS large_records_t1_dk (
FTS_DOC_ID BIGINT NOT NULL,
a TEXT,
b TEXT,
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'b_idx'
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'b_idx'
)
DUPLICATE KEY(FTS_DOC_ID)
DISTRIBUTED BY HASH(FTS_DOC_ID) BUCKETS 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ CREATE TABLE IF NOT EXISTS large_records_t1_uk (
FTS_DOC_ID BIGINT NOT NULL,
a TEXT,
b TEXT,
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'b_idx'
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'b_idx'
)
UNIQUE KEY(FTS_DOC_ID)
DISTRIBUTED BY HASH(FTS_DOC_ID) BUCKETS 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ CREATE TABLE IF NOT EXISTS large_records_t2_dk (
FTS_DOC_ID BIGINT NOT NULL,
a TEXT,
b TEXT,
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'b_idx'
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'b_idx'
)
DUPLICATE KEY(FTS_DOC_ID)
DISTRIBUTED BY HASH(FTS_DOC_ID) BUCKETS 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ CREATE TABLE IF NOT EXISTS large_records_t2_uk (
FTS_DOC_ID BIGINT NOT NULL,
a TEXT,
b TEXT,
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'b_idx'
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'b_idx'
)
UNIQUE KEY(FTS_DOC_ID)
DISTRIBUTED BY HASH(FTS_DOC_ID) BUCKETS 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ CREATE TABLE IF NOT EXISTS large_records_t3_dk (
FTS_DOC_ID BIGINT NOT NULL,
a TEXT,
b TEXT,
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'b_idx'
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'b_idx'
)
DUPLICATE KEY(FTS_DOC_ID)
DISTRIBUTED BY HASH(FTS_DOC_ID) BUCKETS 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ CREATE TABLE IF NOT EXISTS large_records_t3_uk (
FTS_DOC_ID BIGINT NOT NULL,
a TEXT,
b TEXT,
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'b_idx'
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'b_idx'
)
UNIQUE KEY(FTS_DOC_ID)
DISTRIBUTED BY HASH(FTS_DOC_ID) BUCKETS 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ CREATE TABLE IF NOT EXISTS large_records_t4_dk (
FTS_DOC_ID BIGINT NOT NULL,
a TEXT,
b TEXT,
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'b_idx'
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'b_idx'
)
DUPLICATE KEY(FTS_DOC_ID)
DISTRIBUTED BY HASH(FTS_DOC_ID) BUCKETS 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ CREATE TABLE IF NOT EXISTS large_records_t4_uk (
FTS_DOC_ID BIGINT NOT NULL,
a TEXT,
b TEXT,
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'b_idx'
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'b_idx'
)
UNIQUE KEY(FTS_DOC_ID)
DISTRIBUTED BY HASH(FTS_DOC_ID) BUCKETS 3
Expand Down
Loading