Skip to content

Commit

Permalink
support load json index after loadsegment
Browse files Browse the repository at this point in the history
Signed-off-by: Xianhui.Lin <[email protected]>

improve statschecker unittest

Signed-off-by: Xianhui.Lin <[email protected]>

jsonindex expr code format

Signed-off-by: Xianhui.Lin <[email protected]>

fix go format

Signed-off-by: Xianhui.Lin <[email protected]>

fix controllerbasetest fail

Signed-off-by: Xianhui.Lin <[email protected]>

fix jsonindex memeroy leak

Signed-off-by: Xianhui.Lin <[email protected]>

fix jsonkey go format

Signed-off-by: Xianhui.Lin <[email protected]>

fix jsonindex go codeformat

Signed-off-by: Xianhui.Lin <[email protected]>

improve jsoninvert unitest

Signed-off-by: Xianhui.Lin <[email protected]>

delete unuse code

Signed-off-by: Xianhui.Lin <[email protected]>

refine test_json_key_index

Signed-off-by: Xianhui.Lin <[email protected]>

fix cpp unitest

Signed-off-by: Xianhui.Lin <[email protected]>

delete loginfo

Signed-off-by: Xianhui.Lin <[email protected]>

fix complie error

Signed-off-by: Xianhui.Lin <[email protected]>

fix codeformat

Signed-off-by: Xianhui.Lin <[email protected]>
  • Loading branch information
JsDove committed Dec 26, 2024
1 parent b9b1cbd commit 5a23c42
Show file tree
Hide file tree
Showing 41 changed files with 1,624 additions and 1,388 deletions.
73 changes: 1 addition & 72 deletions internal/core/src/common/Json.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,35 +71,6 @@ ExtractSubJson(const std::string& json, const std::vector<std::string>& keys) {
return buffer.GetString();
}

inline std::pair<std::string, std::string>
ParseTopLevelKey(const std::string& json_pointer, bool escaped = false) {
if (json_pointer.empty()) {
return {"", ""};
}

Assert(json_pointer[0] == '/');
size_t start = 1;
size_t end = json_pointer.find('/', start);

std::string top_key = (end == std::string::npos)
? json_pointer.substr(start)
: json_pointer.substr(start, end - start);

if (escaped) {
if (top_key.find("~0") != std::string::npos) {
top_key.replace(top_key.find("~0"), 2, "~");
}
if (top_key.find("~1") != std::string::npos) {
top_key.replace(top_key.find("~1"), 2, "/");
}
}

std::string remaining_path =
(end == std::string::npos) ? "" : json_pointer.substr(end);

return {top_key, remaining_path};
}

static std::string
ToLower(const std::string_view& str) {
std::string result(str);
Expand Down Expand Up @@ -230,8 +201,7 @@ class Json {

// it's always safe to add the padding,
// as we have allocated the memory with this padding
auto doc = parser.parse(data_.data() + offset,
length + simdjson::SIMDJSON_PADDING);
auto doc = parser.parse(data_.data() + offset, length);
AssertInfo(doc.error() == simdjson::SUCCESS,
"failed to parse the json {}: {}",
std::string(data_.data() + offset, length),
Expand Down Expand Up @@ -288,47 +258,6 @@ class Json {
return doc(offset, length).get<T>();
}

template <typename T>
std::pair<T, std::string>
at_pos(uint16_t offset, uint16_t length) const {
const char* pos = data_.data() + offset;
std::string_view str(pos, length);
if constexpr (std::is_same_v<T, bool>) {
if (milvus::ToLower(str) == "true") {
return {true, ""};
} else if (milvus::ToLower(str) == "false") {
return {false, ""};
} else {
return {false, "invalid boolean value"};
}
} else if constexpr (std::is_same_v<T, int64_t>) {
try {
size_t parsed_chars;
int64_t int_value = std::stoll(pos, &parsed_chars, 10);
if (parsed_chars == length) {
return {int_value, ""};
}
return {0, "string contains non-integer characters"};
} catch (...) {
return {0, "invalid integer string"};
}
} else if constexpr (std::is_same_v<T, double>) {
try {
size_t parsed_chars;
double double_value = std::stod(pos, &parsed_chars);
if (parsed_chars == length) {
return {double_value, ""};
}
return {0, "string contains non-integer characters"};
} catch (...) {
return {0, "invalid double string"};
}
} else {
static_assert(std::is_same_v<std::string_view, T>);
return {str, ""};
}
}

value_result<simdjson::dom::array>
array_at(uint16_t offset, uint16_t length) const {
return dom_doc(offset, length).get_array();
Expand Down
11 changes: 11 additions & 0 deletions internal/core/src/common/jsmn.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License

/*
* MIT License
*
Expand Down
69 changes: 34 additions & 35 deletions internal/core/src/exec/expression/BinaryRangeExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,17 +260,17 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();

auto execute_sub_batch = [lower_inclusive,
upper_inclusive]<FilterType filter_type =
FilterType::sequential>(
const T* data,
const bool* valid_data,
const int32_t* offsets,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
HighPrecisionType val1,
HighPrecisionType val2) {
auto execute_sub_batch =
[ lower_inclusive,
upper_inclusive ]<FilterType filter_type = FilterType::sequential>(
const T* data,
const bool* valid_data,
const int32_t* offsets,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
HighPrecisionType val1,
HighPrecisionType val2) {
if (lower_inclusive && upper_inclusive) {
BinaryRangeElementFunc<T, true, true, filter_type> func;
func(val1, val2, data, size, res, offsets);
Expand Down Expand Up @@ -365,18 +365,17 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
ValueType val2 = GetValueFromProto<ValueType>(expr_->upper_val_);
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);

auto execute_sub_batch = [lower_inclusive,
upper_inclusive,
pointer]<FilterType filter_type =
FilterType::sequential>(
const milvus::Json* data,
const bool* valid_data,
const int32_t* offsets,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
ValueType val1,
ValueType val2) {
auto execute_sub_batch =
[ lower_inclusive, upper_inclusive,
pointer ]<FilterType filter_type = FilterType::sequential>(
const milvus::Json* data,
const bool* valid_data,
const int32_t* offsets,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
ValueType val1,
ValueType val2) {
if (lower_inclusive && upper_inclusive) {
BinaryRangeElementFuncForJson<ValueType, true, true, filter_type>
func;
Expand Down Expand Up @@ -539,18 +538,18 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) {
index = std::stoi(expr_->column_.nested_path_[0]);
}

auto execute_sub_batch = [lower_inclusive,
upper_inclusive]<FilterType filter_type =
FilterType::sequential>(
const milvus::ArrayView* data,
const bool* valid_data,
const int32_t* offsets,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
ValueType val1,
ValueType val2,
int index) {
auto execute_sub_batch =
[ lower_inclusive,
upper_inclusive ]<FilterType filter_type = FilterType::sequential>(
const milvus::ArrayView* data,
const bool* valid_data,
const int32_t* offsets,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
ValueType val1,
ValueType val2,
int index) {
if (lower_inclusive && upper_inclusive) {
BinaryRangeElementFuncForArray<ValueType, true, true, filter_type>
func;
Expand Down
22 changes: 11 additions & 11 deletions internal/core/src/exec/expression/ExistsExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,18 +67,18 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegment(OffsetVector* input) {
TargetBitmapView res,
TargetBitmapView valid_res,
const std::string& pointer) {
for (int i = 0; i < size; ++i) {
auto offset = i;
if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i;
}
if (valid_data != nullptr && !valid_data[offset]) {
res[i] = valid_res[i] = false;
continue;
}
res[i] = data[offset].exist(pointer);
for (int i = 0; i < size; ++i) {
auto offset = i;
if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i;
}
};
if (valid_data != nullptr && !valid_data[offset]) {
res[i] = valid_res[i] = false;
continue;
}
res[i] = data[offset].exist(pointer);
}
};

int64_t processed_size;
if (has_offset_input_) {
Expand Down
Loading

0 comments on commit 5a23c42

Please sign in to comment.