Skip to content

Commit

Permalink
Lexicographical comparison for string queries (#7273)
Browse files Browse the repository at this point in the history
* support lexicograpical comparison for string queries

* fix RQL string comparisons

* lint and comment
  • Loading branch information
ironage authored Jan 26, 2024
1 parent 683e868 commit 666ba2d
Show file tree
Hide file tree
Showing 8 changed files with 141 additions and 30 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

### Enhancements
* <New feature description> (PR [#????](https://github.com/realm/realm-core/pull/????))
* None.
* Allow the query builder to construct >, >=, <, <= queries for string constants. This is a case sensitive lexicographical comparison. Improved performance of RQL (parsed) queries on a non-linked string property using: >, >=, <, <=, operators and fixed behaviour that a null string should be evaulated as less than everything, previously nulls were not matched. ([#3939](https://github.com/realm/realm-core/issues/3939), this is a prerequisite for https://github.com/realm/realm-swift/issues/8008).

### Fixed
* <How do the end-user experience this issue? what was the impact?> ([#????](https://github.com/realm/realm-core/issues/????), since v?.?.?)
Expand All @@ -17,6 +17,7 @@
-----------

### Internals
* Expressions in `CHECK()` macros are printed to better; strings are quoted and null strings are printed as NULL instead of no output.
* Refactored version resolution for the `build-apple-device.sh` script. ([#7263](https://github.com/realm/realm-core/pull/7263))
* Remove SyncUser::binding_context() and related things, which were not actually used by any SDKs.

Expand Down
2 changes: 1 addition & 1 deletion src/realm/parser/driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -680,7 +680,7 @@ Query RelationalNode::visit(ParserDriver* drv)
case type_Bool:
break;
case type_String:
break;
return drv->simple_query(op, col_key, right->get_mixed().get_string());
case type_Binary:
break;
case type_Timestamp:
Expand Down
24 changes: 24 additions & 0 deletions src/realm/query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -964,6 +964,30 @@ Query& Query::fulltext(ColKey column_key, StringData value, const LinkMap& link_
return *this;
}

Query& Query::greater(ColKey column_key, StringData value)
{
add_condition<Greater>(column_key, value);
return *this;
}

Query& Query::greater_equal(ColKey column_key, StringData value)
{
add_condition<GreaterEqual>(column_key, value);
return *this;
}

Query& Query::less(ColKey column_key, StringData value)
{
add_condition<Less>(column_key, value);
return *this;
}

Query& Query::less_equal(ColKey column_key, StringData value)
{
add_condition<LessEqual>(column_key, value);
return *this;
}

// Aggregates =================================================================================

bool Query::eval_object(const Obj& obj) const
Expand Down
5 changes: 5 additions & 0 deletions src/realm/query.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,11 @@ class Query final {
Query& like(ColKey column_key, StringData value, bool case_sensitive = true);
Query& fulltext(ColKey column_key, StringData value);
Query& fulltext(ColKey column_key, StringData value, const LinkMap&);
Query& greater(ColKey column_key, StringData value);
Query& greater_equal(ColKey column_key, StringData value);
Query& less(ColKey column_key, StringData value);
Query& less_equal(ColKey column_key, StringData value);


// These are shortcuts for equal(StringData(c_str)) and
// not_equal(StringData(c_str)), and are needed to avoid unwanted
Expand Down
12 changes: 6 additions & 6 deletions src/realm/query_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ void StringNode<Equal>::_search_index_init()
{
REALM_ASSERT(bool(m_index_evaluator));
auto index = ParentNode::m_table.unchecked_ptr()->get_search_index(ParentNode::m_condition_column_key);
m_index_evaluator->init(index, StringData(StringNodeBase::m_value));
m_index_evaluator->init(index, StringNodeBase::m_string_value);
}

bool StringNode<Equal>::do_consume_condition(ParentNode& node)
Expand All @@ -392,7 +392,7 @@ bool StringNode<Equal>::do_consume_condition(ParentNode& node)
REALM_ASSERT(m_condition_column_key == other.m_condition_column_key);
REALM_ASSERT(other.m_needles.empty());
if (m_needles.empty()) {
m_needles.insert(m_value ? StringData(*m_value) : StringData());
m_needles.insert(m_string_value);
}
if (auto& str = other.m_value) {
m_needle_storage.push_back(std::make_unique<char[]>(str->size()));
Expand All @@ -408,7 +408,7 @@ bool StringNode<Equal>::do_consume_condition(ParentNode& node)
size_t StringNode<Equal>::_find_first_local(size_t start, size_t end)
{
if (m_needles.empty()) {
return m_leaf->find_first(m_value, start, end);
return m_leaf->find_first(m_string_value, start, end);
}
else {
if (end == npos)
Expand Down Expand Up @@ -442,7 +442,7 @@ void StringNode<EqualIns>::_search_index_init()
auto index = ParentNode::m_table->get_search_index(ParentNode::m_condition_column_key);
m_index_matches.clear();
constexpr bool case_insensitive = true;
index->find_all(m_index_matches, StringData(StringNodeBase::m_value), case_insensitive);
index->find_all(m_index_matches, StringNodeBase::m_string_value, case_insensitive);
m_index_evaluator->init(&m_index_matches);
}

Expand All @@ -452,7 +452,7 @@ size_t StringNode<EqualIns>::_find_first_local(size_t start, size_t end)
for (size_t s = start; s < end; ++s) {
StringData t = get_string(s);

if (cond(StringData(m_value), m_ucase.c_str(), m_lcase.c_str(), t))
if (cond(m_string_value, m_ucase.c_str(), m_lcase.c_str(), t))
return s;
}

Expand Down Expand Up @@ -484,7 +484,7 @@ void StringNodeFulltext::_search_index_init()
auto index = m_link_map->get_target_table()->get_search_index(ParentNode::m_condition_column_key);
REALM_ASSERT(index && index->is_fulltext_index());
m_index_matches.clear();
index->find_all_fulltext(m_index_matches, StringData(StringNodeBase::m_value));
index->find_all_fulltext(m_index_matches, StringNodeBase::m_string_value);

// If links exists, use backlinks to find the original objects
if (m_link_map->links_exist()) {
Expand Down
29 changes: 20 additions & 9 deletions src/realm/query_engine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1475,6 +1475,7 @@ class StringNodeBase : public ParentNode {

StringNodeBase(StringData v, ColKey column)
: m_value(v.is_null() ? util::none : util::make_optional(std::string(v)))
, m_string_value(m_value)
{
m_condition_column_key = column;
m_dT = 10.0;
Expand Down Expand Up @@ -1510,24 +1511,22 @@ class StringNodeBase : public ParentNode {
StringNodeBase(const StringNodeBase& from)
: ParentNode(from)
, m_value(from.m_value)
, m_string_value(m_value)
, m_is_string_enum(from.m_is_string_enum)
{
}

std::string describe(util::serializer::SerialisationState& state) const override
{
REALM_ASSERT(m_condition_column_key);
StringData sd;
if (bool(StringNodeBase::m_value)) {
sd = StringData(*StringNodeBase::m_value);
}
return state.describe_column(ParentNode::m_table, m_condition_column_key) + " " + describe_condition() + " " +
util::serializer::print_value(sd);
util::serializer::print_value(m_string_value);
}

protected:
std::optional<std::string> m_value;
std::optional<ArrayString> m_leaf;
StringData m_string_value;

bool m_is_string_enum = false;

Expand All @@ -1545,9 +1544,14 @@ class StringNodeBase : public ParentNode {
template <class TConditionFunction>
class StringNode : public StringNodeBase {
public:
constexpr static bool case_sensitive_comparison =
is_any_v<TConditionFunction, Greater, GreaterEqual, Less, LessEqual>;
StringNode(StringData v, ColKey column)
: StringNodeBase(v, column)
{
if constexpr (case_sensitive_comparison) {
return;
}
auto upper = case_map(v, true);
auto lower = case_map(v, false);
if (!upper || !lower) {
Expand All @@ -1572,8 +1576,15 @@ class StringNode : public StringNodeBase {
for (size_t s = start; s < end; ++s) {
StringData t = get_string(s);

if (cond(StringData(m_value), m_ucase.c_str(), m_lcase.c_str(), t))
return s;
if constexpr (case_sensitive_comparison) {
// case insensitive not implemented for: >, >=, <, <=
if (cond(t, m_string_value))
return s;
}
else {
if (cond(m_string_value, m_ucase.c_str(), m_lcase.c_str(), t))
return s;
}
}
return not_found;
}
Expand Down Expand Up @@ -1638,7 +1649,7 @@ class StringNode<Contains> : public StringNodeBase {
for (size_t s = start; s < end; ++s) {
StringData t = get_string(s);

if (cond(StringData(m_value), m_charmap, t))
if (cond(m_string_value, m_charmap, t))
return s;
}
return not_found;
Expand Down Expand Up @@ -1719,7 +1730,7 @@ class StringNode<ContainsIns> : public StringNodeBase {
if (!bool(m_value)) {
return s;
}
if (cond(StringData(m_value), m_ucase.c_str(), m_lcase.c_str(), m_charmap, t))
if (cond(m_string_value, m_ucase.c_str(), m_lcase.c_str(), m_charmap, t))
return s;
}
return not_found;
Expand Down
53 changes: 53 additions & 0 deletions test/test_query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,59 @@ TEST(Query_NextGen_StringConditions)
cnt = table2->column<String>(col_str3).like(realm::null(), false).count();
CHECK_EQUAL(cnt, 1);

auto check_results = [&](Query q, std::vector<StringData>&& matches) {
TableView view = q.find_all();
std::sort(matches.begin(), matches.end());
std::vector<StringData> actual;
for (size_t i = 0; i < view.size(); ++i) {
actual.push_back(view.get_object(i).get<StringData>(col_str3));
}
std::sort(actual.begin(), actual.end());
if (!CHECK_EQUAL(actual, matches)) {
util::format(std::cout, "failed query '%1'\n", q.get_description());
}
TableView parsed_results = table2->query(q.get_description()).find_all();
std::vector<StringData> parsed_matches;
for (size_t i = 0; i < parsed_results.size(); ++i) {
parsed_matches.push_back(parsed_results.get_object(i).get<StringData>(col_str3));
}
std::sort(parsed_matches.begin(), parsed_matches.end());
if (!CHECK_EQUAL(parsed_matches, matches)) {
util::format(std::cout, "failed parsed query '%1'\n", q.get_description());
}
};

// greater
check_results((table2->column<String>(col_str3) > StringData("")), {"foo", "bar", "!"});
check_results((table2->column<String>(col_str3) > StringData("b")), {"foo", "bar"});
check_results((table2->column<String>(col_str3) > StringData("bar")), {"foo"});
check_results((table2->column<String>(col_str3) > StringData("barrr")), {"foo"});
check_results((table2->column<String>(col_str3) > StringData("bb")), {"foo"});
check_results((table2->column<String>(col_str3) > StringData("z")), {});

// less
check_results((table2->column<String>(col_str3) < StringData("")), {StringData()});
check_results((table2->column<String>(col_str3) < StringData("b")), {"", "!", StringData()});
check_results((table2->column<String>(col_str3) < StringData("bar")), {"", "!", StringData()});
check_results((table2->column<String>(col_str3) < StringData("barrr")), {"bar", "", "!", StringData()});
check_results((table2->column<String>(col_str3) < StringData("z")), {"foo", "bar", "", "!", StringData()});
check_results((table2->column<String>(col_str3) < StringData("f")), {"bar", "", "!", StringData()});
check_results((table2->column<String>(col_str3) < StringData("fp")), {"foo", "bar", "", "!", StringData()});

// greater equal
check_results((table2->column<String>(col_str3) >= StringData("")), {"foo", "bar", "!", ""});
check_results((table2->column<String>(col_str3) >= StringData("b")), {"foo", "bar"});
check_results((table2->column<String>(col_str3) >= StringData("bar")), {"foo", "bar"});
check_results((table2->column<String>(col_str3) >= StringData("barrrr")), {"foo"});
check_results((table2->column<String>(col_str3) >= StringData("z")), {});

// less equal
check_results((table2->column<String>(col_str3) <= StringData("")), {StringData(), ""});
check_results((table2->column<String>(col_str3) <= StringData("b")), {"", "!", StringData()});
check_results((table2->column<String>(col_str3) <= StringData("bar")), {"bar", "", "!", StringData()});
check_results((table2->column<String>(col_str3) <= StringData("barrrr")), {"bar", "", "!", StringData()});
check_results((table2->column<String>(col_str3) <= StringData("z")), {"foo", "bar", "", "!", StringData()});

TableRef table3 = group.add_table(StringData("table3"));
auto col_link1 = table3->add_column(*table2, "link1");

Expand Down
43 changes: 30 additions & 13 deletions test/util/unit_test.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <realm/util/features.h>
#include <realm/util/logger.hpp>
#include <realm/util/safe_int_ops.hpp>
#include <realm/util/serializer.hpp>


#define TEST(name) TEST_IF(name, true)
Expand Down Expand Up @@ -751,7 +752,6 @@ inline bool definitely_less(long double a, long double b, long double epsilon)
return b - a > std::max(std::abs(a), std::abs(b)) * epsilon;
}


template <class T, bool is_float>
struct SetPrecision {
static void exec(std::ostream&)
Expand All @@ -767,14 +767,23 @@ struct SetPrecision<T, true> {
}
};

template <typename T>
constexpr static bool realm_serializable_types =
is_any_v<T, StringData, BinaryData, Timestamp, ObjectId, std::optional<ObjectId>, ObjKey, ObjLink, UUID,
std::optional<UUID>, bool, float, std::optional<float>, double, std::optional<double>, realm::null>;

template <class T>
void to_string(const T& value, std::string& str)
{
// FIXME: Put string values in quotes, and escape non-printables as well as '"' and '\\'.
std::ostringstream out;
SetPrecision<T, std::is_floating_point<T>::value>::exec(out);
out << value;
str = out.str();
if constexpr (realm_serializable_types<T>) {
str = util::serializer::print_value(value);
}
else {
std::ostringstream out;
SetPrecision<T, std::is_floating_point<T>::value>::exec(out);
out << value;
str = out.str();
}
}

template <class T>
Expand All @@ -789,7 +798,12 @@ void to_string(const std::vector<T>& value, std::string& str)
if (!first) {
out << ", ";
}
out << v;
if constexpr (realm_serializable_types<T>) {
out << util::serializer::print_value(v);
}
else {
out << v;
}
first = false;
}
out << "}";
Expand All @@ -799,14 +813,17 @@ void to_string(const std::vector<T>& value, std::string& str)
template <class T>
void to_string(const std::optional<T>& value, std::string& str)
{
// FIXME: Put string values in quotes, and escape non-printables as well as '"' and '\\'.
std::ostringstream out;
SetPrecision<T, std::is_floating_point<T>::value>::exec(out);
util::stream_possible_optional(out, value);
str = out.str();
if constexpr (realm_serializable_types<T>) {
str = util::serializer::print_value(value);
}
else {
std::ostringstream out;
SetPrecision<T, std::is_floating_point<T>::value>::exec(out);
util::stream_possible_optional(out, value);
str = out.str();
}
}


inline bool TestContext::check_cond(bool cond, const char* file, long line, const char* macro_name,
const char* cond_text)
{
Expand Down

0 comments on commit 666ba2d

Please sign in to comment.