Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize for 'Less' and 'Greater' query on Timestamp #3344

Merged
merged 8 commits into from
Aug 15, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

### Internals
* The release binaries for Apple platforms are now built with Xcode 9.4 (up from 9.2).
* Performance of queries on Timestamp is improved

----------------------------------------------

Expand Down
12 changes: 12 additions & 0 deletions src/realm/column_timestamp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,18 @@ void TimestampColumn::leaf_to_dot(MemRef, ArrayParent*, size_t /*ndx_in_parent*/
// FIXME: Dummy implementation
}

void TimestampColumn::get_seconds_leaf(size_t ndx, size_t& ndx_in_leaf,
BpTree<util::Optional<int64_t>>::LeafInfo& inout_leaf_info) const noexcept
{
m_seconds->get_leaf(ndx, ndx_in_leaf, inout_leaf_info);
}

void TimestampColumn::get_nanoseconds_leaf(size_t ndx, size_t& ndx_in_leaf,
BpTree<int64_t>::LeafInfo& inout_leaf) const noexcept
{
m_nanoseconds->get_leaf(ndx, ndx_in_leaf, inout_leaf);
}

// LCOV_EXCL_STOP ignore debug functions

void TimestampColumn::add(const Timestamp& ts)
Expand Down
3 changes: 3 additions & 0 deletions src/realm/column_timestamp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ class TimestampColumn : public ColumnBaseSimple {
void to_dot(std::ostream&, StringData title = StringData()) const override;
void do_dump_node_structure(std::ostream&, int level) const override;
void leaf_to_dot(MemRef, ArrayParent*, size_t ndx_in_parent, std::ostream&) const override;
void get_seconds_leaf(size_t ndx, size_t& ndx_in_leaf,
BpTree<util::Optional<int64_t>>::LeafInfo& inout_leaf) const noexcept;
void get_nanoseconds_leaf(size_t ndx, size_t& ndx_in_leaf, BpTree<int64_t>::LeafInfo& inout_leaf) const noexcept;

void add(const Timestamp& ts = Timestamp{});
Timestamp get(size_t row_ndx) const noexcept;
Expand Down
145 changes: 145 additions & 0 deletions src/realm/query_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -684,3 +684,148 @@ ExpressionNode::ExpressionNode(const ExpressionNode& from, QueryNodeHandoverPatc
, m_expression(from.m_expression->clone(patches))
{
}

namespace realm {
template <>
size_t TimestampNode<Greater>::find_first_local(size_t start, size_t end)
{
REALM_ASSERT(this->m_table);

if (this->m_value.is_null()) {
return not_found;
}
while (start < end) {
size_t ret = this->find_first_local_seconds<GreaterEqual>(start, end);

if (ret == not_found)
return not_found;

util::Optional<int64_t> seconds = get_seconds_and_cache(ret);
if (!seconds) {
start = ret + 1;
continue;
}
if (*seconds > m_value.get_seconds()) {
return ret;
}
// We now know that neither m_value nor current value is null and that seconds part equals
// We are just missing to compare nanoseconds part
int32_t nanos = this->get_nanoseconds_and_cache(ret);
if (nanos > m_value.get_nanoseconds()) {
return ret;
}
start = ret + 1;
}

return not_found;
}

template <>
size_t TimestampNode<Less>::find_first_local(size_t start, size_t end)
{
REALM_ASSERT(this->m_table);

if (this->m_value.is_null()) {
return not_found;
}
while (start < end) {
size_t ret = this->find_first_local_seconds<LessEqual>(start, end);

if (ret == not_found)
return not_found;

util::Optional<int64_t> seconds = get_seconds_and_cache(ret);
if (!seconds) {
start = ret + 1;
continue;
}
if (*seconds < m_value.get_seconds()) {
return ret;
}
// We now know that neither m_value nor current value is null and that seconds part equals
// We are just missing to compare nanoseconds part
int32_t nanos = this->get_nanoseconds_and_cache(ret);
if (nanos < m_value.get_nanoseconds()) {
return ret;
}
start = ret + 1;
}

return not_found;
}

template <>
size_t TimestampNode<GreaterEqual>::find_first_local(size_t start, size_t end)
{
REALM_ASSERT(this->m_table);

if (this->m_value.is_null()) {
return not_found;
}
while (start < end) {
size_t ret = this->find_first_local_seconds<GreaterEqual>(start, end);

if (ret == not_found)
return not_found;

util::Optional<int64_t> seconds = get_seconds_and_cache(ret);
if (!seconds) { // null equality
start = ret + 1;
continue;
}
if (*seconds > m_value.get_seconds()) {
return ret;
}
// We now know that neither m_value nor current value is null and that seconds part equals
// We are just missing to compare nanoseconds part
int32_t nanos = this->get_nanoseconds_and_cache(ret);
if (nanos >= m_value.get_nanoseconds()) {
return ret;
}
start = ret + 1;
}

return not_found;
}

template <>
size_t TimestampNode<LessEqual>::find_first_local(size_t start, size_t end)
{
REALM_ASSERT(this->m_table);

if (this->m_value.is_null()) {
return not_found;
}
while (start < end) {
size_t ret = this->find_first_local_seconds<LessEqual>(start, end);

if (ret == not_found)
return not_found;

util::Optional<int64_t> seconds = get_seconds_and_cache(ret);
if (!seconds) { // null equality
start = ret + 1;
continue;
}
if (*seconds < m_value.get_seconds()) {
return ret;
}
// We now know that neither m_value nor current value is null and that seconds part equals
// We are just missing to compare nanoseconds part
int32_t nanos = this->get_nanoseconds_and_cache(ret);
if (nanos <= m_value.get_nanoseconds()) {
return ret;
}
start = ret + 1;
}

return not_found;
}
#ifdef _WIN32
// Explicit instantiation required on some windows builds
template size_t TimestampNode<Greater>::find_first_local(size_t start, size_t end);
template size_t TimestampNode<Less>::find_first_local(size_t start, size_t end);
template size_t TimestampNode<GreaterEqual>::find_first_local(size_t start, size_t end);
template size_t TimestampNode<LessEqual>::find_first_local(size_t start, size_t end);
#endif
} // namespace realm
147 changes: 130 additions & 17 deletions src/realm/query_engine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1283,20 +1283,24 @@ class BinaryNode : public ParentNode {
};


template <class TConditionFunction>
class TimestampNode : public ParentNode {
class TimestampNodeBase : public ParentNode {
public:
using TConditionValue = Timestamp;
static const bool special_null_node = false;
using LeafTypeSeconds = typename IntNullColumn::LeafType;
using LeafInfoSeconds = typename IntNullColumn::LeafInfo;
using LeafTypeNanos = typename IntegerColumn::LeafType;
using LeafInfoNanos = typename IntegerColumn::LeafInfo;


TimestampNode(Timestamp v, size_t column)
TimestampNodeBase(Timestamp v, size_t column)
: m_value(v)
{
m_condition_column_idx = column;
}

TimestampNode(null, size_t column)
: TimestampNode(Timestamp{}, column)
TimestampNodeBase(null, size_t column)
: TimestampNodeBase(Timestamp{}, column)
{
}

Expand All @@ -1315,27 +1319,54 @@ class TimestampNode : public ParentNode {
ParentNode::init();

m_dD = 100.0;

// Clear leaf cache
m_leaf_end_seconds = 0;
m_array_ptr_seconds.reset(); // Explicitly destroy the old one first, because we're reusing the memory.
m_array_ptr_seconds.reset(new (&m_leaf_cache_storage_seconds) LeafTypeSeconds(m_table->get_alloc()));
m_leaf_end_nanos = 0;
m_array_ptr_nanos.reset(); // Explicitly destroy the old one first, because we're reusing the memory.
m_array_ptr_nanos.reset(new (&m_leaf_cache_storage_nanos) LeafTypeNanos(m_table->get_alloc()));
}

size_t find_first_local(size_t start, size_t end) override
protected:
void get_leaf_seconds(const TimestampColumn& col, size_t ndx)
{
size_t ret = m_condition_column->find<TConditionFunction>(m_value, start, end);
return ret;
size_t ndx_in_leaf;
LeafInfoSeconds leaf_info_seconds{&m_leaf_ptr_seconds, m_array_ptr_seconds.get()};
col.get_seconds_leaf(ndx, ndx_in_leaf, leaf_info_seconds);
m_leaf_start_seconds = ndx - ndx_in_leaf;
m_leaf_end_seconds = m_leaf_start_seconds + m_leaf_ptr_seconds->size();
}

virtual std::string describe(util::serializer::SerialisationState& state) const override
void get_leaf_nanos(const TimestampColumn& col, size_t ndx)
{
REALM_ASSERT(m_condition_column != nullptr);
return state.describe_column(ParentNode::m_table, m_condition_column->get_column_index())
+ " " + TConditionFunction::description() + " " + util::serializer::print_value(TimestampNode::m_value);
size_t ndx_in_leaf;
LeafInfoNanos leaf_info_nanos{&m_leaf_ptr_nanos, m_array_ptr_nanos.get()};
col.get_nanoseconds_leaf(ndx, ndx_in_leaf, leaf_info_nanos);
m_leaf_start_nanos = ndx - ndx_in_leaf;
m_leaf_end_nanos = m_leaf_start_nanos + m_leaf_ptr_nanos->size();
}

std::unique_ptr<ParentNode> clone(QueryNodeHandoverPatches* patches) const override
util::Optional<int64_t> get_seconds_and_cache(size_t ndx)
{
return std::unique_ptr<ParentNode>(new TimestampNode(*this, patches));
// Cache internal leaves
if (ndx >= this->m_leaf_end_seconds || ndx < this->m_leaf_start_seconds) {
this->get_leaf_seconds(*this->m_condition_column, ndx);
}
return this->m_leaf_ptr_seconds->get(ndx - this->m_leaf_start_seconds);
}

int32_t get_nanoseconds_and_cache(size_t ndx)
{
// Cache internal leaves
if (ndx >= this->m_leaf_end_nanos || ndx < this->m_leaf_start_nanos) {
this->get_leaf_nanos(*this->m_condition_column, ndx);
}
return int32_t(this->m_leaf_ptr_nanos->get(ndx - this->m_leaf_start_nanos));
}

TimestampNode(const TimestampNode& from, QueryNodeHandoverPatches* patches)
TimestampNodeBase(const TimestampNodeBase& from, QueryNodeHandoverPatches* patches)
: ParentNode(from, patches)
, m_value(from.m_value)
, m_condition_column(from.m_condition_column)
Expand All @@ -1344,11 +1375,94 @@ class TimestampNode : public ParentNode {
m_condition_column_idx = m_condition_column->get_column_index();
}

private:
Timestamp m_value;
const TimestampColumn* m_condition_column;

// Leaf cache seconds
using LeafCacheStorageSeconds =
typename std::aligned_storage<sizeof(LeafTypeSeconds), alignof(LeafTypeSeconds)>::type;
LeafCacheStorageSeconds m_leaf_cache_storage_seconds;
std::unique_ptr<LeafTypeSeconds, PlacementDelete> m_array_ptr_seconds;
const LeafTypeSeconds* m_leaf_ptr_seconds = nullptr;
size_t m_leaf_start_seconds = npos;
size_t m_leaf_end_seconds = 0;

// Leaf cache nanoseconds
using LeafCacheStorageNanos = typename std::aligned_storage<sizeof(LeafTypeNanos), alignof(LeafTypeNanos)>::type;
LeafCacheStorageNanos m_leaf_cache_storage_nanos;
std::unique_ptr<LeafTypeNanos, PlacementDelete> m_array_ptr_nanos;
const LeafTypeNanos* m_leaf_ptr_nanos = nullptr;
size_t m_leaf_start_nanos = npos;
size_t m_leaf_end_nanos = 0;
};

template <class TConditionFunction>
class TimestampNode : public TimestampNodeBase {
public:
using TimestampNodeBase::TimestampNodeBase;

template <class Condition>
size_t find_first_local_seconds(size_t start, size_t end)
{
REALM_ASSERT(!this->m_value.is_null());
while (start < end) {
// Cache internal leaves
if (start >= this->m_leaf_end_seconds || start < this->m_leaf_start_seconds) {
this->get_leaf_seconds(*this->m_condition_column, start);
}

size_t end2;
if (end > this->m_leaf_end_seconds)
end2 = this->m_leaf_end_seconds - this->m_leaf_start_seconds;
else
end2 = end - this->m_leaf_start_seconds;

int64_t needle = this->m_value.get_seconds();
size_t s = this->m_leaf_ptr_seconds->template find_first<Condition>(
needle, start - this->m_leaf_start_seconds, end2);

if (s == not_found) {
start = this->m_leaf_end_seconds;
continue;
}
return s + this->m_leaf_start_seconds;
}
return not_found;
}

// see query_engine.cpp for operator specialisations
size_t find_first_local(size_t start, size_t end) override
{
REALM_ASSERT(this->m_table);

size_t ret = m_condition_column->find<TConditionFunction>(m_value, start, end);
return ret;
}

virtual std::string describe(util::serializer::SerialisationState& state) const override
{
REALM_ASSERT(m_condition_column != nullptr);
return state.describe_column(ParentNode::m_table, m_condition_column->get_column_index()) + " " +
TConditionFunction::description() + " " + util::serializer::print_value(TimestampNode::m_value);
}

std::unique_ptr<ParentNode> clone(QueryNodeHandoverPatches* patches) const override
{
return std::unique_ptr<ParentNode>(new TimestampNode(*this, patches));
}
};

template <>
size_t TimestampNode<Greater>::find_first_local(size_t start, size_t end);
template <>
size_t TimestampNode<Less>::find_first_local(size_t start, size_t end);
template <>
size_t TimestampNode<GreaterEqual>::find_first_local(size_t start, size_t end);
template <>
size_t TimestampNode<LessEqual>::find_first_local(size_t start, size_t end);



class StringNodeBase : public ParentNode {
public:
using TConditionValue = StringData;
Expand Down Expand Up @@ -2248,7 +2362,6 @@ class TwoColumnsNode : public ParentNode {

// For Next-Generation expressions like col1 / col2 + 123 > col4 * 100.
class ExpressionNode : public ParentNode {

public:
ExpressionNode(std::unique_ptr<Expression>);

Expand Down
Loading