Skip to content

Commit

Permalink
Optimize timestamp queries: ==, !=, !NULL
Browse files Browse the repository at this point in the history
  • Loading branch information
James Stone committed Aug 15, 2019
1 parent 6bd8de6 commit cec3647
Show file tree
Hide file tree
Showing 3 changed files with 212 additions and 13 deletions.
86 changes: 86 additions & 0 deletions src/realm/query_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -821,11 +821,97 @@ size_t TimestampNode<LessEqual>::find_first_local(size_t start, size_t end)

return not_found;
}

template <>
size_t TimestampNode<Equal>::find_first_local(size_t start, size_t end)
{
REALM_ASSERT(this->m_table);

if (m_value.is_null()) {
if (REALM_UNLIKELY(!m_condition_column_is_nullable)) {
return not_found;
}
return this->find_first_local_seconds<Equal>(start, end);
}

while (start < end) {
size_t ret = this->find_first_local_seconds<Equal>(start, end);

if (ret == not_found)
return not_found;

// We now know that neither m_value nor current value is null and that seconds part equals
// We are just missing to compare nanoseconds part
int32_t nanos = this->get_nanoseconds_and_cache(ret);
if (nanos == m_value.get_nanoseconds()) {
return ret;
}
start = ret + 1;
}

return not_found;
}

template <>
size_t TimestampNode<NotEqual>::find_first_local(size_t start, size_t end)
{
REALM_ASSERT(this->m_table);

// in many scenarios it is likely that the first item is not equal do a quick first check
if (start < end) {
util::Optional<int64_t> seconds = get_seconds_and_cache(start);
if (seconds != m_needle_seconds
|| (seconds && this->get_nanoseconds_and_cache(start) != m_value.get_nanoseconds())) {
return start;
}
}

++start;

if (m_value.is_null()) {
if (REALM_UNLIKELY(!m_condition_column_is_nullable)) {
return not_found;
}
return this->find_first_local_seconds<NotNull>(start, end);
}

int64_t needle_seconds = m_value.get_seconds();
while (start < end) {
util::Optional<int64_t> seconds = get_seconds_and_cache(start);
if (!seconds || *seconds != needle_seconds) {
return start;
}
// We now know that neither m_value nor current value is null and that seconds part equals
// We are just missing to compare nanoseconds part
int32_t nanos = this->get_nanoseconds_and_cache(start);
if (nanos != m_value.get_nanoseconds()) {
return start;
}
++start;
}

return not_found;
}

template <>
size_t TimestampNode<NotNull>::find_first_local(size_t start, size_t end)
{
REALM_ASSERT(this->m_table);
if (REALM_UNLIKELY(!m_condition_column_is_nullable)) {
return start; // all are not null, return first
}
return this->find_first_local_seconds<NotNull>(start, end);
}


#ifdef _WIN32
// Explicit instantiation required on some windows builds
template size_t TimestampNode<Greater>::find_first_local(size_t start, size_t end);
template size_t TimestampNode<Less>::find_first_local(size_t start, size_t end);
template size_t TimestampNode<GreaterEqual>::find_first_local(size_t start, size_t end);
template size_t TimestampNode<LessEqual>::find_first_local(size_t start, size_t end);
template size_t TimestampNode<Equal>::find_first_local(size_t start, size_t end);
template size_t TimestampNode<NotEqual>::find_first_local(size_t start, size_t end);
template size_t TimestampNode<NotNull>::find_first_local(size_t start, size_t end);
#endif
} // namespace realm
21 changes: 15 additions & 6 deletions src/realm/query_engine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1295,6 +1295,7 @@ class TimestampNodeBase : public ParentNode {

TimestampNodeBase(Timestamp v, size_t column)
: m_value(v)
, m_needle_seconds(m_value.is_null() ? util::none : util::make_optional(m_value.get_seconds()))
{
m_condition_column_idx = column;
}
Expand Down Expand Up @@ -1327,6 +1328,7 @@ class TimestampNodeBase : public ParentNode {
m_leaf_end_nanos = 0;
m_array_ptr_nanos.reset(); // Explicitly destroy the old one first, because we're reusing the memory.
m_array_ptr_nanos.reset(new (&m_leaf_cache_storage_nanos) LeafTypeNanos(m_table->get_alloc()));
m_condition_column_is_nullable = m_condition_column->is_nullable();
}

protected:
Expand Down Expand Up @@ -1354,7 +1356,8 @@ class TimestampNodeBase : public ParentNode {
if (ndx >= this->m_leaf_end_seconds || ndx < this->m_leaf_start_seconds) {
this->get_leaf_seconds(*this->m_condition_column, ndx);
}
return this->m_leaf_ptr_seconds->get(ndx - this->m_leaf_start_seconds);
const size_t ndx_in_leaf = ndx - m_leaf_start_seconds;
return this->m_leaf_ptr_seconds->get(ndx_in_leaf);
}

int32_t get_nanoseconds_and_cache(size_t ndx)
Expand All @@ -1369,14 +1372,18 @@ class TimestampNodeBase : public ParentNode {
TimestampNodeBase(const TimestampNodeBase& from, QueryNodeHandoverPatches* patches)
: ParentNode(from, patches)
, m_value(from.m_value)
, m_needle_seconds(from.m_needle_seconds)
, m_condition_column(from.m_condition_column)
, m_condition_column_is_nullable(from.m_condition_column_is_nullable)
{
if (m_condition_column && patches)
m_condition_column_idx = m_condition_column->get_column_index();
}

Timestamp m_value;
util::Optional<int64_t> m_needle_seconds;
const TimestampColumn* m_condition_column;
bool m_condition_column_is_nullable = false;

// Leaf cache seconds
using LeafCacheStorageSeconds =
Expand Down Expand Up @@ -1404,7 +1411,6 @@ class TimestampNode : public TimestampNodeBase {
template <class Condition>
size_t find_first_local_seconds(size_t start, size_t end)
{
REALM_ASSERT(!this->m_value.is_null());
while (start < end) {
// Cache internal leaves
if (start >= this->m_leaf_end_seconds || start < this->m_leaf_start_seconds) {
Expand All @@ -1417,9 +1423,8 @@ class TimestampNode : public TimestampNodeBase {
else
end2 = end - this->m_leaf_start_seconds;

int64_t needle = this->m_value.get_seconds();
size_t s = this->m_leaf_ptr_seconds->template find_first<Condition>(
needle, start - this->m_leaf_start_seconds, end2);
m_needle_seconds, start - this->m_leaf_start_seconds, end2);

if (s == not_found) {
start = this->m_leaf_end_seconds;
Expand Down Expand Up @@ -1460,8 +1465,12 @@ template <>
size_t TimestampNode<GreaterEqual>::find_first_local(size_t start, size_t end);
template <>
size_t TimestampNode<LessEqual>::find_first_local(size_t start, size_t end);


template <>
size_t TimestampNode<Equal>::find_first_local(size_t start, size_t end);
template <>
size_t TimestampNode<NotEqual>::find_first_local(size_t start, size_t end);
template <>
size_t TimestampNode<NotNull>::find_first_local(size_t start, size_t end);

class StringNodeBase : public ParentNode {
public:
Expand Down
118 changes: 111 additions & 7 deletions test/benchmark-common-tasks/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -325,18 +325,25 @@ struct BenchmarkWithTimestamps : Benchmark {
std::multiset<Timestamp> values;
Timestamp needle;
size_t num_results_to_needle;
size_t num_nulls_added = 0;
double percent_results_to_needle = 0.5;
double percent_chance_of_null = 0.0;
void before_all(SharedGroup& group)
{
WriteTransaction tr(group);
TableRef t = tr.add_table("Timestamps");
t->add_column(type_Timestamp, "timestamps");
t->add_column(type_Timestamp, "timestamps", true);
t->add_empty_row(BASE_SIZE * 10);
Random r;
for (size_t i = 0; i < BASE_SIZE * 10; ++i) {
Timestamp time{r.draw_int<int64_t>(0, 1000000), r.draw_int<int32_t>(0, 1000000)};
if (r.draw_int<int64_t>(0, 100) / 100.0 < percent_chance_of_null) {
time = Timestamp{};
++num_nulls_added;
} else {
values.insert(time);
}
t->set_timestamp(0, i, time);
values.insert(time);
}
tr.commit();
// simulate a work load where this percent of random results match
Expand All @@ -359,7 +366,8 @@ struct BenchmarkWithTimestamps : Benchmark {

struct BenchmarkQueryTimestampGreater : BenchmarkWithTimestamps {
void before_all(SharedGroup& group) {
percent_results_to_needle = 2.0f / 3.0f;
percent_chance_of_null = 0.10f;
percent_results_to_needle = 0.80f;
BenchmarkWithTimestamps::before_all(group);
}
const char* name() const
Expand All @@ -380,7 +388,8 @@ struct BenchmarkQueryTimestampGreater : BenchmarkWithTimestamps {

struct BenchmarkQueryTimestampGreaterEqual : BenchmarkWithTimestamps {
void before_all(SharedGroup& group) {
percent_results_to_needle = 2.0f / 3.0f;
percent_chance_of_null = 0.10f;
percent_results_to_needle = 0.80f;
BenchmarkWithTimestamps::before_all(group);
}
const char* name() const
Expand All @@ -402,7 +411,8 @@ struct BenchmarkQueryTimestampGreaterEqual : BenchmarkWithTimestamps {

struct BenchmarkQueryTimestampLess : BenchmarkWithTimestamps {
void before_all(SharedGroup& group) {
percent_results_to_needle = 1.0f / 3.0f;
percent_chance_of_null = 0.10f;
percent_results_to_needle = 0.20f;
BenchmarkWithTimestamps::before_all(group);
}
const char* name() const
Expand All @@ -423,7 +433,8 @@ struct BenchmarkQueryTimestampLess : BenchmarkWithTimestamps {

struct BenchmarkQueryTimestampLessEqual : BenchmarkWithTimestamps {
void before_all(SharedGroup& group) {
percent_results_to_needle = 1.0f / 3.0f;
percent_chance_of_null = 0.10f;
percent_results_to_needle = 0.20f;
BenchmarkWithTimestamps::before_all(group);
}
const char* name() const
Expand All @@ -442,6 +453,96 @@ struct BenchmarkQueryTimestampLessEqual : BenchmarkWithTimestamps {
}
};


struct BenchmarkQueryTimestampEqual : BenchmarkWithTimestamps {
void before_all(SharedGroup& group) {
percent_chance_of_null = 0.10f;
percent_results_to_needle = 0.33f;
BenchmarkWithTimestamps::before_all(group);
}
const char* name() const
{
return "QueryTimestampEqual";
}

void operator()(SharedGroup& group)
{
ReadTransaction tr(group);
ConstTableRef table = tr.get_table("Timestamps");
Query query = table->where().equal(0, needle);
TableView results = query.find_all();
REALM_ASSERT_EX(results.size() == values.count(needle), results.size(), num_results_to_needle, values.count(needle), values.size());
static_cast<void>(results);
}
};

struct BenchmarkQueryTimestampNotEqual : BenchmarkWithTimestamps {
void before_all(SharedGroup& group) {
percent_chance_of_null = 0.60f;
percent_results_to_needle = 0.10f;
BenchmarkWithTimestamps::before_all(group);
}
const char* name() const
{
return "QueryTimestampNotEqual";
}

void operator()(SharedGroup& group)
{
ReadTransaction tr(group);
ConstTableRef table = tr.get_table("Timestamps");
Query query = table->where().not_equal(0, needle);
TableView results = query.find_all();
REALM_ASSERT_EX(results.size() == values.size() - values.count(needle) + num_nulls_added, results.size(), values.size(), values.count(needle));
static_cast<void>(results);
}
};

struct BenchmarkQueryTimestampNotNull : BenchmarkWithTimestamps {
void before_all(SharedGroup& group) {
percent_chance_of_null = 0.60f;
percent_results_to_needle = 0.0;
BenchmarkWithTimestamps::before_all(group);
needle = Timestamp{};
}
const char* name() const
{
return "QueryTimestampNotNull";
}

void operator()(SharedGroup& group)
{
ReadTransaction tr(group);
ConstTableRef table = tr.get_table("Timestamps");
Query query = table->where().not_equal(0, realm::null());
TableView results = query.find_all();
REALM_ASSERT_EX(results.size() == values.size(), results.size(), num_nulls_added, num_results_to_needle, values.size());
static_cast<void>(results);
}
};

struct BenchmarkQueryTimestampEqualNull : BenchmarkWithTimestamps {
void before_all(SharedGroup& group) {
percent_chance_of_null = 0.10;
percent_results_to_needle = 0.0;
BenchmarkWithTimestamps::before_all(group);
needle = Timestamp{};
}
const char* name() const
{
return "QueryTimestampEqualNull";
}
void operator()(SharedGroup& group)
{
ReadTransaction tr(group);
ConstTableRef table = tr.get_table("Timestamps");
Query query = table->where().equal(0, realm::null());
TableView results = query.find_all();
REALM_ASSERT_EX(results.size() == num_nulls_added, results.size(), num_nulls_added, values.size());
static_cast<void>(results);
}
};

struct BenchmarkWithIntsTable : Benchmark {
void before_all(SharedGroup& group)
{
Expand Down Expand Up @@ -1265,11 +1366,14 @@ int benchmark_common_tasks_main()
BENCH(BenchmarkQueryChainedOrIntsIndexed);
BENCH(BenchmarkQueryIntEquality);
BENCH(BenchmarkQueryIntEqualityIndexed);

BENCH(BenchmarkQueryTimestampGreater);
BENCH(BenchmarkQueryTimestampGreaterEqual);
BENCH(BenchmarkQueryTimestampLess);
BENCH(BenchmarkQueryTimestampLessEqual);
BENCH(BenchmarkQueryTimestampEqual);
BENCH(BenchmarkQueryTimestampNotEqual);
BENCH(BenchmarkQueryTimestampNotNull);
BENCH(BenchmarkQueryTimestampEqualNull);

#undef BENCH
return 0;
Expand Down

0 comments on commit cec3647

Please sign in to comment.