Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize timestamp queries: ==, !=, !NULL #3351

Merged
merged 3 commits into from
Aug 16, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 30 additions & 9 deletions src/realm/array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2219,17 +2219,38 @@ bool Array::find_optimized(int64_t value, size_t start, size_t end, size_t basei
end = nullable_array ? size() - 1 : size();

if (nullable_array) {
// We were called by find() of a nullable array. So skip first entry, take nulls in count, etc, etc. Fixme:
// Huge speed optimizations are possible here! This is a very simple generic method.
for (; start2 < end; start2++) {
int64_t v = get<bitwidth>(start2 + 1);
if (c(v, value, v == get(0), find_null)) {
util::Optional<int64_t> v2(v == get(0) ? util::none : util::make_optional(v));
if (!find_action<action, Callback>(start2 + baseindex, v2, state, callback))
return false; // tell caller to stop aggregating/search
if (std::is_same<cond, Equal>::value) {
// In case of Equal it is safe to use the optimized logic. We just have to fetch the null value
// if this is what we are looking for. And we have to adjust the indexes to compensate for the
// null value at position 0.
if (find_null) {
value = get(0);
}
else {
// If the value to search for is equal to the null value, the value cannot be in the array
if (value == get(0)) {
return true;
}
}
start2++;
end++;
baseindex--;
}
else {
// We were called by find() of a nullable array. So skip first entry, take nulls in count, etc, etc. Fixme:
// Huge speed optimizations are possible here! This is a very simple generic method.
auto null_value = get(0);
for (; start2 < end; start2++) {
int64_t v = get<bitwidth>(start2 + 1);
bool value_is_null = (v == null_value);
if (c(v, value, value_is_null, find_null)) {
util::Optional<int64_t> v2(value_is_null ? util::none : util::make_optional(v));
if (!find_action<action, Callback>(start2 + baseindex, v2, state, callback))
return false; // tell caller to stop aggregating/search
}
}
return true; // tell caller to continue aggregating/search (on next array leafs)
}
return true; // tell caller to continue aggregating/search (on next array leafs)
}


Expand Down
90 changes: 84 additions & 6 deletions src/realm/query_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -759,9 +759,6 @@ size_t TimestampNode<GreaterEqual>::find_first_local(size_t start, size_t end)
{
REALM_ASSERT(this->m_table);

if (this->m_value.is_null()) {
return not_found;
}
while (start < end) {
size_t ret = this->find_first_local_seconds<GreaterEqual>(start, end);

Expand All @@ -770,6 +767,9 @@ size_t TimestampNode<GreaterEqual>::find_first_local(size_t start, size_t end)

util::Optional<int64_t> seconds = get_seconds_and_cache(ret);
if (!seconds) { // null equality
if (this->m_value.is_null()) {
return ret;
}
start = ret + 1;
continue;
}
Expand All @@ -793,9 +793,6 @@ size_t TimestampNode<LessEqual>::find_first_local(size_t start, size_t end)
{
REALM_ASSERT(this->m_table);

if (this->m_value.is_null()) {
return not_found;
}
while (start < end) {
size_t ret = this->find_first_local_seconds<LessEqual>(start, end);

Expand All @@ -804,6 +801,9 @@ size_t TimestampNode<LessEqual>::find_first_local(size_t start, size_t end)

util::Optional<int64_t> seconds = get_seconds_and_cache(ret);
if (!seconds) { // null equality
if (this->m_value.is_null()) {
return ret;
}
start = ret + 1;
continue;
}
Expand All @@ -821,11 +821,89 @@ size_t TimestampNode<LessEqual>::find_first_local(size_t start, size_t end)

return not_found;
}

template <>
size_t TimestampNode<Equal>::find_first_local(size_t start, size_t end)
{
REALM_ASSERT(this->m_table);

if (m_value.is_null()) {
if (REALM_UNLIKELY(!m_condition_column_is_nullable)) {
return not_found;
}
return this->find_first_local_seconds<Equal>(start, end);
}

while (start < end) {
size_t ret = this->find_first_local_seconds<Equal>(start, end);

if (ret == not_found)
return not_found;

// We now know that neither m_value nor current value is null and that seconds part equals
// We are just missing to compare nanoseconds part
int32_t nanos = this->get_nanoseconds_and_cache(ret);
if (nanos == m_value.get_nanoseconds()) {
return ret;
}
start = ret + 1;
}

return not_found;
}

template <>
size_t TimestampNode<NotEqual>::find_first_local(size_t start, size_t end)
{
REALM_ASSERT(this->m_table);

if (m_value.is_null()) {
if (REALM_UNLIKELY(!m_condition_column_is_nullable)) {
return not_found;
}
return this->find_first_local_seconds<NotNull>(start, end);
}

int64_t needle_seconds = m_value.get_seconds();
while (start < end) {
util::Optional<int64_t> seconds = get_seconds_and_cache(start);
// Null value does not match
if (seconds) {
if (*seconds != needle_seconds) {
return start;
}
// We now know that neither m_value nor current value is null and that seconds part equals
// We are just missing to compare nanoseconds part
int32_t nanos = this->get_nanoseconds_and_cache(start);
if (nanos != m_value.get_nanoseconds()) {
return start;
}
}
++start;
}

return not_found;
}

template <>
size_t TimestampNode<NotNull>::find_first_local(size_t start, size_t end)
{
REALM_ASSERT(this->m_table);
if (REALM_UNLIKELY(!m_condition_column_is_nullable)) {
return start; // all are not null, return first
}
return this->find_first_local_seconds<NotNull>(start, end);
}


#ifdef _WIN32
// Explicit instantiation required on some windows builds
template size_t TimestampNode<Greater>::find_first_local(size_t start, size_t end);
template size_t TimestampNode<Less>::find_first_local(size_t start, size_t end);
template size_t TimestampNode<GreaterEqual>::find_first_local(size_t start, size_t end);
template size_t TimestampNode<LessEqual>::find_first_local(size_t start, size_t end);
template size_t TimestampNode<Equal>::find_first_local(size_t start, size_t end);
template size_t TimestampNode<NotEqual>::find_first_local(size_t start, size_t end);
template size_t TimestampNode<NotNull>::find_first_local(size_t start, size_t end);
#endif
} // namespace realm
21 changes: 15 additions & 6 deletions src/realm/query_engine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1295,6 +1295,7 @@ class TimestampNodeBase : public ParentNode {

TimestampNodeBase(Timestamp v, size_t column)
: m_value(v)
, m_needle_seconds(m_value.is_null() ? util::none : util::make_optional(m_value.get_seconds()))
{
m_condition_column_idx = column;
}
Expand Down Expand Up @@ -1327,6 +1328,7 @@ class TimestampNodeBase : public ParentNode {
m_leaf_end_nanos = 0;
m_array_ptr_nanos.reset(); // Explicitly destroy the old one first, because we're reusing the memory.
m_array_ptr_nanos.reset(new (&m_leaf_cache_storage_nanos) LeafTypeNanos(m_table->get_alloc()));
m_condition_column_is_nullable = m_condition_column->is_nullable();
}

protected:
Expand Down Expand Up @@ -1354,7 +1356,8 @@ class TimestampNodeBase : public ParentNode {
if (ndx >= this->m_leaf_end_seconds || ndx < this->m_leaf_start_seconds) {
this->get_leaf_seconds(*this->m_condition_column, ndx);
}
return this->m_leaf_ptr_seconds->get(ndx - this->m_leaf_start_seconds);
const size_t ndx_in_leaf = ndx - m_leaf_start_seconds;
return this->m_leaf_ptr_seconds->get(ndx_in_leaf);
}

int32_t get_nanoseconds_and_cache(size_t ndx)
Expand All @@ -1369,14 +1372,18 @@ class TimestampNodeBase : public ParentNode {
TimestampNodeBase(const TimestampNodeBase& from, QueryNodeHandoverPatches* patches)
: ParentNode(from, patches)
, m_value(from.m_value)
, m_needle_seconds(from.m_needle_seconds)
, m_condition_column(from.m_condition_column)
, m_condition_column_is_nullable(from.m_condition_column_is_nullable)
{
if (m_condition_column && patches)
m_condition_column_idx = m_condition_column->get_column_index();
}

Timestamp m_value;
util::Optional<int64_t> m_needle_seconds;
const TimestampColumn* m_condition_column;
bool m_condition_column_is_nullable = false;

// Leaf cache seconds
using LeafCacheStorageSeconds =
Expand Down Expand Up @@ -1404,7 +1411,6 @@ class TimestampNode : public TimestampNodeBase {
template <class Condition>
size_t find_first_local_seconds(size_t start, size_t end)
{
REALM_ASSERT(!this->m_value.is_null());
while (start < end) {
// Cache internal leaves
if (start >= this->m_leaf_end_seconds || start < this->m_leaf_start_seconds) {
Expand All @@ -1417,9 +1423,8 @@ class TimestampNode : public TimestampNodeBase {
else
end2 = end - this->m_leaf_start_seconds;

int64_t needle = this->m_value.get_seconds();
size_t s = this->m_leaf_ptr_seconds->template find_first<Condition>(
needle, start - this->m_leaf_start_seconds, end2);
m_needle_seconds, start - this->m_leaf_start_seconds, end2);

if (s == not_found) {
start = this->m_leaf_end_seconds;
Expand Down Expand Up @@ -1460,8 +1465,12 @@ template <>
size_t TimestampNode<GreaterEqual>::find_first_local(size_t start, size_t end);
template <>
size_t TimestampNode<LessEqual>::find_first_local(size_t start, size_t end);


template <>
size_t TimestampNode<Equal>::find_first_local(size_t start, size_t end);
template <>
size_t TimestampNode<NotEqual>::find_first_local(size_t start, size_t end);
template <>
size_t TimestampNode<NotNull>::find_first_local(size_t start, size_t end);

class StringNodeBase : public ParentNode {
public:
Expand Down
Loading