Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use index on int equal queries #3272

Merged
merged 4 commits into from
Apr 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,16 @@
* Improved query performance for unindexed integer columns when the query has a chain of OR conditions.
This will improve performance of "IN" queries generated by SDKs.
([PR #2888](https://github.com/realm/realm-sync/issues/2888).
* Use search index in queries on integer columns (equality only). This will improve performance of
queries on integer primary key properties for example. ([PR #3272](https://github.com/realm/realm-core/pull/3272)).

### Fixed
* <How to hit and notice issue? what was the impact?> ([#????](https://github.com/realm/realm-core/issues/????), since v?.?.?)
* Writing a snapshot to file via Group::write() could produce a file with some parts not
reachable from top array (a memory leak). ([#2911](https://github.com/realm/realm-sync/issues/2911))
* Fixed a bug in queries on a string column with more than two "or" equality conditions when the last condition also had an
"and" clause. For example: `first == "a" || (first == "b" && second == 1)` would be incorrectly evaluated as
`(first == "a" || first == "b")`. ([#3271](https://github.com/realm/realm-core/pull/3271), since v5.17.0)

### Breaking changes
* None.
Expand Down
92 changes: 69 additions & 23 deletions src/realm/query_engine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -885,6 +885,27 @@ class IntegerNode<ColType, Equal> : public IntegerNodeBase<ColType> {
: BaseType(value, column_ndx)
{
}
~IntegerNode()
{
if (m_result.is_attached()) {
m_result.destroy();
}
}

void init() override
{
BaseType::init();
m_nb_needles = m_needles.size();

if (has_search_index()) {
ref_type ref = IntegerColumn::create(Allocator::get_default());
m_result.init_from_ref(Allocator::get_default(), ref);

IntegerNodeBase<ColType>::m_condition_column->find_all(m_result, this->m_value, 0, realm::npos);
m_index_get = 0;
m_index_end = m_result.size();
}
}

void consume_condition(IntegerNode<ColType, Equal>* other)
{
Expand All @@ -904,7 +925,22 @@ class IntegerNode<ColType, Equal> : public IntegerNodeBase<ColType> {
size_t find_first_local(size_t start, size_t end) override
{
REALM_ASSERT(this->m_table);
size_t nb_needles = m_needles.size();

if (has_search_index()) {
while (m_index_get < m_index_end) {
// m_results are stored in sorted ascending order, guaranteed by the string index
size_t ndx = size_t(m_result.get(m_index_get));
if (ndx >= end) {
break;
}
m_index_get++;
if (ndx >= start) {
return ndx;
}
}
return not_found;
}


while (start < end) {
// Cache internal leaves
Expand All @@ -918,28 +954,8 @@ class IntegerNode<ColType, Equal> : public IntegerNodeBase<ColType> {

auto start2 = start - this->m_leaf_start;
size_t s = realm::npos;
if (nb_needles) {
const auto not_in_set = m_needles.end();
bool search = nb_needles < 22;
auto cmp_fn = [this, search, not_in_set](const auto& v) {
if (search) {
for (auto it = m_needles.begin(); it != not_in_set; ++it) {
if (*it == v)
return true;
}
return false;
}
else {
return (m_needles.find(v) != not_in_set);
}
};
for (size_t i = start2; i < end2; ++i) {
auto val = this->m_leaf_ptr->get(i);
if (cmp_fn(val)) {
s = i;
break;
}
}
if (m_nb_needles) {
s = find_first_haystack(start2, end2);
}
else if (end2 - start2 == 1) {
if (this->m_leaf_ptr->get(start2) == this->m_value) {
Expand Down Expand Up @@ -991,12 +1007,42 @@ class IntegerNode<ColType, Equal> : public IntegerNodeBase<ColType> {

private:
std::unordered_set<TConditionValue> m_needles;
IntegerColumn m_result;
size_t m_nb_needles = 0;
size_t m_index_get = 0;
size_t m_index_end = 0;

IntegerNode(const IntegerNode<ColType, Equal>& from, QueryNodeHandoverPatches* patches)
: BaseType(from, patches)
, m_needles(from.m_needles)
{
}
size_t find_first_haystack(size_t start, size_t end)
{
const auto not_in_set = m_needles.end();
// for a small number of conditions, it is faster to do a linear search than to compute the hash
// the decision threshold was determined experimentally to be 22 conditions
bool search = m_nb_needles < 22;
auto cmp_fn = [this, search, not_in_set](const auto& v) {
if (search) {
for (auto it = m_needles.begin(); it != not_in_set; ++it) {
if (*it == v)
return true;
}
return false;
}
else {
return (m_needles.find(v) != not_in_set);
}
};
for (size_t i = start; i < end; ++i) {
auto val = this->m_leaf_ptr->get(i);
if (cmp_fn(val)) {
return i;
}
}
return realm::npos;
}
};


Expand Down
95 changes: 94 additions & 1 deletion test/benchmark-common-tasks/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,96 @@ struct BenchmarkWithInts : BenchmarkWithIntsTable {
}
};

struct BenchmarkQueryChainedOrInts : BenchmarkWithIntsTable {
const size_t num_queried_matches = 1000;
const size_t num_rows = 100000;
std::vector<int64_t> values_to_query;
const char* name() const
{
return "QueryChainedOrInts";
}

void before_all(SharedGroup& group)
{
BenchmarkWithIntsTable::before_all(group);
WriteTransaction tr(group);
TableRef t = tr.get_table("IntOnly");
t->add_empty_row(num_rows);
REALM_ASSERT(num_rows > num_queried_matches);
Random r;
for (size_t i = 0; i < num_rows; ++i) {
t->set_int(0, i, int64_t(i));
}
for (size_t i = 0; i < num_queried_matches; ++i) {
size_t ndx_to_match = (num_rows / num_queried_matches) * i;
values_to_query.push_back(t->get_int(0, ndx_to_match));
}
tr.commit();
}

void operator()(SharedGroup& group)
{
ReadTransaction tr(group);
ConstTableRef table = tr.get_table("IntOnly");
Query query = table->where();
for (size_t i = 0; i < values_to_query.size(); ++i) {
query.Or().equal(0, values_to_query[i]);
}
TableView results = query.find_all();
REALM_ASSERT_EX(results.size() == num_queried_matches, results.size(), num_queried_matches,
values_to_query.size());
static_cast<void>(results);
}
};

struct BenchmarkQueryChainedOrIntsIndexed : BenchmarkQueryChainedOrInts {
const char* name() const
{
return "QueryChainedOrIntsIndexed";
}
void before_all(SharedGroup& group)
{
BenchmarkQueryChainedOrInts::before_all(group);
WriteTransaction tr(group);
TableRef t = tr.get_table("IntOnly");
t->add_search_index(0);
tr.commit();
}
};


struct BenchmarkQueryIntEquality : BenchmarkQueryChainedOrInts {
const char* name() const
{
return "QueryIntEquality";
}

void operator()(SharedGroup& group)
{
ReadTransaction tr(group);
ConstTableRef table = tr.get_table("IntOnly");
Query query = table->where().equal(0, 0);
TableView results = query.find_all();
REALM_ASSERT_EX(results.size() == 1, results.size(), 1);
static_cast<void>(results);
}
};

struct BenchmarkQueryIntEqualityIndexed : BenchmarkQueryIntEquality {
const char* name() const
{
return "QueryIntEqualityIndexed";
}
void before_all(SharedGroup& group)
{
BenchmarkQueryIntEquality::before_all(group);
WriteTransaction tr(group);
TableRef t = tr.get_table("IntOnly");
t->add_search_index(0);
tr.commit();
}
};

struct BenchmarkQuery : BenchmarkWithStrings {
const char* name() const
{
Expand Down Expand Up @@ -382,7 +472,6 @@ struct BenchmarkQueryChainedOrStrings : BenchmarkWithStringsTable {
TableRef t = tr.get_table("StringOnly");
t->add_empty_row(num_rows);
REALM_ASSERT(num_rows > num_queried_matches);
Random r;
for (size_t i = 0; i < num_rows; ++i) {
std::stringstream ss;
ss << i;
Expand Down Expand Up @@ -1050,6 +1139,10 @@ int benchmark_common_tasks_main()
BENCH(BenchmarkQueryInsensitiveStringIndexed);
BENCH(BenchmarkNonInitatorOpen);
BENCH(BenchmarkQueryChainedOrStrings);
BENCH(BenchmarkQueryChainedOrInts);
BENCH(BenchmarkQueryChainedOrIntsIndexed);
BENCH(BenchmarkQueryIntEquality);
BENCH(BenchmarkQueryIntEqualityIndexed);

#undef BENCH
return 0;
Expand Down
75 changes: 69 additions & 6 deletions test/test_query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11876,7 +11876,7 @@ TEST(Query_IntOrQueryOptimisation)
TEST(Query_IntOrQueryPerformance)
{
using std::chrono::duration_cast;
using std::chrono::milliseconds;
using std::chrono::microseconds;

Group g;
TableRef table = g.add_table("table");
Expand Down Expand Up @@ -11910,12 +11910,12 @@ TEST(Query_IntOrQueryPerformance)
auto before = std::chrono::steady_clock().now();
size_t ints_count = q_ints.count();
auto after = std::chrono::steady_clock().now();
// std::cout << "ints count: " << duration_cast<milliseconds>(after - before).count() << " ms" << std::endl;
// std::cout << "ints count: " << duration_cast<microseconds>(after - before).count() << " us" << std::endl;

before = std::chrono::steady_clock().now();
size_t nullable_ints_count = q_nullables.count();
after = std::chrono::steady_clock().now();
// std::cout << "nullable ints count: " << duration_cast<milliseconds>(after - before).count() << " ms"
// std::cout << "nullable ints count: " << duration_cast<microseconds>(after - before).count() << " us"
// << std::endl;

size_t expected_nullable_query_count =
Expand All @@ -11927,9 +11927,72 @@ TEST(Query_IntOrQueryPerformance)
run_queries(2);
run_queries(2048);

// table->add_search_index(ints_col_ndx);
// table->add_search_index(nullable_ints_col_ndx);
// run_queries();
table->add_search_index(ints_col_ndx);
table->add_search_index(nullable_ints_col_ndx);

run_queries(2);
run_queries(2048);
}

TEST(Query_IntIndexed)
{
Group g;
TableRef table = g.add_table("table");
auto col_id = table->add_column(type_Int, "id");

table->add_empty_row(100);
for (int i = 0; i < 100; i++) {
table->set_int(col_id, i, i % 10);
}

table->add_search_index(col_id);
Query q = table->where().equal(col_id, 1);
CHECK_EQUAL(q.count(), 10);
}

TEST(Query_IntIndexedUnordered)
{
Group g;
TableRef table = g.add_table("table");
auto col_id = table->add_column(type_Int, "id");
table->add_search_index(col_id);
table->add_empty_row(4);
table->set_int(col_id, 0, 1);
table->set_int(col_id, 2, 1);
table->set_int(col_id, 1, 1);
table->set_int(col_id, 3, 2);
table->move_last_over(1);

Query q = table->where().equal(col_id, 1) || table->where().equal(col_id, 2);
CHECK_EQUAL(q.count(), 3);
}

TEST(Query_IntFindInNextLeaf)
{
Group g;
TableRef table = g.add_table("table");
auto col_id = table->add_column(type_Int, "id");

// num_misses > MAX_BPNODE_SIZE to check results on other leafs
constexpr size_t num_misses = 1000 * 2 + 10;
table->add_empty_row(num_misses);
for (size_t i = 0; i < num_misses; i++) {
table->set_int(col_id, i, i % 10);
}
size_t last_row_ndx = table->add_empty_row();
table->set_int(col_id, last_row_ndx, 20);

auto check_results = [&]() {
for (size_t i = 0; i < 10; ++i) {
Query qi = table->where().equal(col_id, int64_t(i));
CHECK_EQUAL(qi.count(), num_misses / 10);
}
Query q20 = table->where().equal(col_id, 20);
CHECK_EQUAL(q20.count(), 1);
};
check_results();
table->add_search_index(col_id);
check_results();
}


Expand Down