Skip to content

Commit

Permalink
Optimize for 'Less' and 'Greater' query on Timestamp
Browse files Browse the repository at this point in the history
  • Loading branch information
jedelbo committed Aug 12, 2019
1 parent dfd5baa commit 21ac6c4
Show file tree
Hide file tree
Showing 8 changed files with 124 additions and 6 deletions.
5 changes: 5 additions & 0 deletions src/realm/array_basic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,12 @@ class BasicArray : public Array {
void truncate(size_t size);
void clear();

template <class Cond>
size_t find_first(T value, size_t begin = 0, size_t end = npos) const;
size_t find_first(T value, size_t begin = 0, size_t end = npos) const
{
return find_first<Equal>(value, begin, end);
}
void find_all(IntegerColumn* result, T value, size_t add_offset = 0, size_t begin = 0, size_t end = npos) const;

size_t count(T value, size_t begin = 0, size_t end = npos) const;
Expand Down
12 changes: 10 additions & 2 deletions src/realm/array_basic_tpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -286,8 +286,16 @@ size_t BasicArray<T>::find(T value, size_t begin, size_t end) const
return i == data + end ? not_found : size_t(i - data);
}

template <class T>
inline size_t BasicArray<T>::find_first(T value, size_t begin, size_t end) const
template <>
template <>
inline size_t BasicArray<float>::find_first<Equal>(float value, size_t begin, size_t end) const
{
return this->find(value, begin, end);
}

template <>
template <>
inline size_t BasicArray<double>::find_first<Equal>(double value, size_t begin, size_t end) const
{
return this->find(value, begin, end);
}
Expand Down
6 changes: 6 additions & 0 deletions src/realm/array_integer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ class ArrayIntNull : public Array {


size_t find_first(value_type value, size_t begin = 0, size_t end = npos) const;
size_t find_first_ge(value_type value, size_t begin = 0, size_t end = npos) const;


// Overwrite Array::bptree_leaf_insert to correctly split nodes.
Expand Down Expand Up @@ -624,6 +625,11 @@ inline size_t ArrayIntNull::find_first(value_type value, size_t begin, size_t en
{
return find_first<Equal>(value, begin, end);
}

inline size_t ArrayIntNull::find_first_ge(value_type value, size_t begin, size_t end) const
{
return find_first<GreaterEqual>(value, begin, end);
}
}

#endif // REALM_ARRAY_INTEGER_HPP
6 changes: 4 additions & 2 deletions src/realm/bptree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ class BpTree : public BpTreeBase {
T front() const noexcept;
T back() const noexcept;

template <class Cond>
size_t find_first(T value, size_t begin = 0, size_t end = npos) const;
void find_all(IntegerColumn& out_indices, T value, size_t begin = 0, size_t end = npos) const;

Expand Down Expand Up @@ -1176,10 +1177,11 @@ void BpTree<T>::get_leaf(size_t ndx, size_t& ndx_in_leaf, LeafInfo& inout_leaf_i
}

template <class T>
template <class Cond>
size_t BpTree<T>::find_first(T value, size_t begin, size_t end) const
{
if (root_is_leaf()) {
return root_as_leaf().find_first(value, begin, end);
return root_as_leaf().template find_first<Cond>(value, begin, end);
}

// FIXME: It would be better to always require that 'end' is
Expand All @@ -1197,7 +1199,7 @@ size_t BpTree<T>::find_first(T value, size_t begin, size_t end) const
get_leaf(ndx_in_tree, ndx_in_leaf, leaf_info);
size_t leaf_offset = ndx_in_tree - ndx_in_leaf;
size_t end_in_leaf = std::min(leaf->size(), end - leaf_offset);
size_t ndx = leaf->find_first(value, ndx_in_leaf, end_in_leaf); // Throws (maybe)
size_t ndx = leaf->template find_first<Cond>(value, ndx_in_leaf, end_in_leaf); // Throws (maybe)
if (ndx != not_found)
return leaf_offset + ndx;
ndx_in_tree = leaf_offset + end_in_leaf;
Expand Down
2 changes: 1 addition & 1 deletion src/realm/column.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1053,7 +1053,7 @@ size_t Column<T>::find_first(T value, size_t begin, size_t end) const

if (m_search_index && begin == 0 && end == npos)
return m_search_index->find_first(value);
return m_tree.find_first(value, begin, end);
return m_tree.template find_first<Equal>(value, begin, end);
}

template <class T>
Expand Down
52 changes: 52 additions & 0 deletions src/realm/column_timestamp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,4 +422,56 @@ Timestamp TimestampColumn::minimum(size_t* result_index) const
{
return minmax<Less>(result_index);
}

template <>
size_t TimestampColumn::find<Greater>(Timestamp value, size_t begin, size_t end) const noexcept
{
if (m_nullable || value.is_null()) {
return find_slow<Greater>(value, begin, end);
}

auto seconds = value.get_seconds();
auto ns = value.get_nanoseconds();

while (begin < end) {
auto ndx = m_seconds->find_first<GreaterEqual>(seconds, begin, end);
if (ndx != npos) {
if (*m_seconds->get(ndx) > seconds || m_nanoseconds->get(ndx) > ns) {
return ndx;
}
begin = ndx + 1;
}
else {
begin = end;
}
}

return npos;
}

template <>
size_t TimestampColumn::find<Less>(Timestamp value, size_t begin, size_t end) const noexcept
{
if (m_nullable || value.is_null()) {
return find_slow<Less>(value, begin, end);
}

auto seconds = value.get_seconds();
auto ns = value.get_nanoseconds();

while (begin < end) {
auto ndx = m_seconds->find_first<LessEqual>(seconds, begin, end);
if (ndx != npos) {
if (*m_seconds->get(ndx) < seconds || m_nanoseconds->get(ndx) < ns) {
return ndx;
}
begin = ndx + 1;
}
else {
begin = end;
}
}

return npos;
}
}
11 changes: 10 additions & 1 deletion src/realm/column_timestamp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class TimestampColumn : public ColumnBaseSimple {
void erase(size_t row_ndx, bool is_last);

template <class Condition>
size_t find(Timestamp value, size_t begin, size_t end) const noexcept
size_t find_slow(Timestamp value, size_t begin, size_t end) const noexcept
{
// FIXME: Here we can do all sorts of clever optimizations. Use bithack-search on seconds, then for each match
// check nanoseconds, etc. Lots of possibilities. Below code is naive and slow but works.
Expand All @@ -110,6 +110,12 @@ class TimestampColumn : public ColumnBaseSimple {
return npos;
}

template <class Condition>
size_t find(Timestamp value, size_t begin, size_t end) const noexcept
{
return find_slow<Condition>(value, begin, end);
}

typedef Timestamp value_type;

private:
Expand Down Expand Up @@ -150,6 +156,9 @@ class TimestampColumn : public ColumnBaseSimple {
}
};

template <>
size_t TimestampColumn::find<Greater>(Timestamp value, size_t begin, size_t end) const noexcept;

} // namespace realm

#endif // REALM_COLUMN_TIMESTAMP_HPP
36 changes: 36 additions & 0 deletions test/test_shared.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4008,4 +4008,40 @@ TEST(Shared_GetCommitSize)
}
}

TEST(Shared_TimestampQuery)
{
SHARED_GROUP_TEST_PATH(path);
SharedGroup sg(path);

{
WriteTransaction wt(sg);

auto table = wt.get_or_add_table("table");
auto col_date = table->add_column(type_Timestamp, "date");
auto col_value = table->add_column(type_Int, "value");

for (int i = 0; i < 10; i++) {
auto ndx = table->add_empty_row();
table->set_timestamp(col_date, ndx, Timestamp(i / 4, i % 4));
table->set_int(col_value, ndx, i);
}
// Timestamps : {0,0}, {0,1}, {0,2}, {0,3}, {1,0}, {1,1}, {1,2}, {1,3}, {2,0}, {2,1}
wt.commit();
}

Group& g = const_cast<Group&>(sg.begin_read());
auto table = g.get_table("table");
auto col_date = table->get_column_index("date");

Query q = table->column<Timestamp>(col_date) > Timestamp(0, 3);
auto cnt = q.count();
CHECK_EQUAL(cnt, 6);
q = table->column<Timestamp>(col_date) >= Timestamp(0, 3);
cnt = q.count();
CHECK_EQUAL(cnt, 7);
q = table->column<Timestamp>(col_date) > Timestamp(0, 3) && table->column<Timestamp>(col_date) < Timestamp(1, 3);
cnt = q.count();
CHECK_EQUAL(cnt, 3);
}

#endif // TEST_SHARED

0 comments on commit 21ac6c4

Please sign in to comment.