From 21ac6c4fcac547d32dbd17079638816ebc082a61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B8rgen=20Edelbo?= Date: Mon, 12 Aug 2019 16:28:24 +0200 Subject: [PATCH 1/8] Optimize for 'Less' and 'Greater' query on Timestamp --- src/realm/array_basic.hpp | 5 ++++ src/realm/array_basic_tpl.hpp | 12 ++++++-- src/realm/array_integer.hpp | 6 ++++ src/realm/bptree.hpp | 6 ++-- src/realm/column.hpp | 2 +- src/realm/column_timestamp.cpp | 52 ++++++++++++++++++++++++++++++++++ src/realm/column_timestamp.hpp | 11 ++++++- test/test_shared.cpp | 36 +++++++++++++++++++++++ 8 files changed, 124 insertions(+), 6 deletions(-) diff --git a/src/realm/array_basic.hpp b/src/realm/array_basic.hpp index 6b9c212f473..7f650e11c32 100644 --- a/src/realm/array_basic.hpp +++ b/src/realm/array_basic.hpp @@ -47,7 +47,12 @@ class BasicArray : public Array { void truncate(size_t size); void clear(); + template size_t find_first(T value, size_t begin = 0, size_t end = npos) const; + size_t find_first(T value, size_t begin = 0, size_t end = npos) const + { + return find_first(value, begin, end); + } void find_all(IntegerColumn* result, T value, size_t add_offset = 0, size_t begin = 0, size_t end = npos) const; size_t count(T value, size_t begin = 0, size_t end = npos) const; diff --git a/src/realm/array_basic_tpl.hpp b/src/realm/array_basic_tpl.hpp index 5ce37fde1fa..e2b8b89228f 100644 --- a/src/realm/array_basic_tpl.hpp +++ b/src/realm/array_basic_tpl.hpp @@ -286,8 +286,16 @@ size_t BasicArray::find(T value, size_t begin, size_t end) const return i == data + end ? not_found : size_t(i - data); } -template -inline size_t BasicArray::find_first(T value, size_t begin, size_t end) const +template <> +template <> +inline size_t BasicArray::find_first(float value, size_t begin, size_t end) const +{ + return this->find(value, begin, end); +} + +template <> +template <> +inline size_t BasicArray::find_first(double value, size_t begin, size_t end) const { return this->find(value, begin, end); } diff --git a/src/realm/array_integer.hpp b/src/realm/array_integer.hpp index e460c5381de..cf535ffe2f6 100644 --- a/src/realm/array_integer.hpp +++ b/src/realm/array_integer.hpp @@ -162,6 +162,7 @@ class ArrayIntNull : public Array { size_t find_first(value_type value, size_t begin = 0, size_t end = npos) const; + size_t find_first_ge(value_type value, size_t begin = 0, size_t end = npos) const; // Overwrite Array::bptree_leaf_insert to correctly split nodes. @@ -624,6 +625,11 @@ inline size_t ArrayIntNull::find_first(value_type value, size_t begin, size_t en { return find_first(value, begin, end); } + +inline size_t ArrayIntNull::find_first_ge(value_type value, size_t begin, size_t end) const +{ + return find_first(value, begin, end); +} } #endif // REALM_ARRAY_INTEGER_HPP diff --git a/src/realm/bptree.hpp b/src/realm/bptree.hpp index aae64de8299..ae1a621e8d4 100644 --- a/src/realm/bptree.hpp +++ b/src/realm/bptree.hpp @@ -291,6 +291,7 @@ class BpTree : public BpTreeBase { T front() const noexcept; T back() const noexcept; + template size_t find_first(T value, size_t begin = 0, size_t end = npos) const; void find_all(IntegerColumn& out_indices, T value, size_t begin = 0, size_t end = npos) const; @@ -1176,10 +1177,11 @@ void BpTree::get_leaf(size_t ndx, size_t& ndx_in_leaf, LeafInfo& inout_leaf_i } template +template size_t BpTree::find_first(T value, size_t begin, size_t end) const { if (root_is_leaf()) { - return root_as_leaf().find_first(value, begin, end); + return root_as_leaf().template find_first(value, begin, end); } // FIXME: It would be better to always require that 'end' is @@ -1197,7 +1199,7 @@ size_t BpTree::find_first(T value, size_t begin, size_t end) const get_leaf(ndx_in_tree, ndx_in_leaf, leaf_info); size_t leaf_offset = ndx_in_tree - ndx_in_leaf; size_t end_in_leaf = std::min(leaf->size(), end - leaf_offset); - size_t ndx = leaf->find_first(value, ndx_in_leaf, end_in_leaf); // Throws (maybe) + size_t ndx = leaf->template find_first(value, ndx_in_leaf, end_in_leaf); // Throws (maybe) if (ndx != not_found) return leaf_offset + ndx; ndx_in_tree = leaf_offset + end_in_leaf; diff --git a/src/realm/column.hpp b/src/realm/column.hpp index 928fd93a7ce..e073a3cf99c 100644 --- a/src/realm/column.hpp +++ b/src/realm/column.hpp @@ -1053,7 +1053,7 @@ size_t Column::find_first(T value, size_t begin, size_t end) const if (m_search_index && begin == 0 && end == npos) return m_search_index->find_first(value); - return m_tree.find_first(value, begin, end); + return m_tree.template find_first(value, begin, end); } template diff --git a/src/realm/column_timestamp.cpp b/src/realm/column_timestamp.cpp index bfe11703739..b5fd8b6ec18 100644 --- a/src/realm/column_timestamp.cpp +++ b/src/realm/column_timestamp.cpp @@ -422,4 +422,56 @@ Timestamp TimestampColumn::minimum(size_t* result_index) const { return minmax(result_index); } + +template <> +size_t TimestampColumn::find(Timestamp value, size_t begin, size_t end) const noexcept +{ + if (m_nullable || value.is_null()) { + return find_slow(value, begin, end); + } + + auto seconds = value.get_seconds(); + auto ns = value.get_nanoseconds(); + + while (begin < end) { + auto ndx = m_seconds->find_first(seconds, begin, end); + if (ndx != npos) { + if (*m_seconds->get(ndx) > seconds || m_nanoseconds->get(ndx) > ns) { + return ndx; + } + begin = ndx + 1; + } + else { + begin = end; + } + } + + return npos; +} + +template <> +size_t TimestampColumn::find(Timestamp value, size_t begin, size_t end) const noexcept +{ + if (m_nullable || value.is_null()) { + return find_slow(value, begin, end); + } + + auto seconds = value.get_seconds(); + auto ns = value.get_nanoseconds(); + + while (begin < end) { + auto ndx = m_seconds->find_first(seconds, begin, end); + if (ndx != npos) { + if (*m_seconds->get(ndx) < seconds || m_nanoseconds->get(ndx) < ns) { + return ndx; + } + begin = ndx + 1; + } + else { + begin = end; + } + } + + return npos; +} } diff --git a/src/realm/column_timestamp.hpp b/src/realm/column_timestamp.hpp index d5c4859d76a..4da23eb63db 100644 --- a/src/realm/column_timestamp.hpp +++ b/src/realm/column_timestamp.hpp @@ -96,7 +96,7 @@ class TimestampColumn : public ColumnBaseSimple { void erase(size_t row_ndx, bool is_last); template - size_t find(Timestamp value, size_t begin, size_t end) const noexcept + size_t find_slow(Timestamp value, size_t begin, size_t end) const noexcept { // FIXME: Here we can do all sorts of clever optimizations. Use bithack-search on seconds, then for each match // check nanoseconds, etc. Lots of possibilities. Below code is naive and slow but works. @@ -110,6 +110,12 @@ class TimestampColumn : public ColumnBaseSimple { return npos; } + template + size_t find(Timestamp value, size_t begin, size_t end) const noexcept + { + return find_slow(value, begin, end); + } + typedef Timestamp value_type; private: @@ -150,6 +156,9 @@ class TimestampColumn : public ColumnBaseSimple { } }; +template <> +size_t TimestampColumn::find(Timestamp value, size_t begin, size_t end) const noexcept; + } // namespace realm #endif // REALM_COLUMN_TIMESTAMP_HPP diff --git a/test/test_shared.cpp b/test/test_shared.cpp index e8211fff1dd..6b133576e7a 100644 --- a/test/test_shared.cpp +++ b/test/test_shared.cpp @@ -4008,4 +4008,40 @@ TEST(Shared_GetCommitSize) } } +TEST(Shared_TimestampQuery) +{ + SHARED_GROUP_TEST_PATH(path); + SharedGroup sg(path); + + { + WriteTransaction wt(sg); + + auto table = wt.get_or_add_table("table"); + auto col_date = table->add_column(type_Timestamp, "date"); + auto col_value = table->add_column(type_Int, "value"); + + for (int i = 0; i < 10; i++) { + auto ndx = table->add_empty_row(); + table->set_timestamp(col_date, ndx, Timestamp(i / 4, i % 4)); + table->set_int(col_value, ndx, i); + } + // Timestamps : {0,0}, {0,1}, {0,2}, {0,3}, {1,0}, {1,1}, {1,2}, {1,3}, {2,0}, {2,1} + wt.commit(); + } + + Group& g = const_cast(sg.begin_read()); + auto table = g.get_table("table"); + auto col_date = table->get_column_index("date"); + + Query q = table->column(col_date) > Timestamp(0, 3); + auto cnt = q.count(); + CHECK_EQUAL(cnt, 6); + q = table->column(col_date) >= Timestamp(0, 3); + cnt = q.count(); + CHECK_EQUAL(cnt, 7); + q = table->column(col_date) > Timestamp(0, 3) && table->column(col_date) < Timestamp(1, 3); + cnt = q.count(); + CHECK_EQUAL(cnt, 3); +} + #endif // TEST_SHARED From 6fe9397b6cf11c897e0cd225d96f0d6bc05f4d4c Mon Sep 17 00:00:00 2001 From: James Stone Date: Mon, 12 Aug 2019 13:23:58 -0700 Subject: [PATCH 2/8] Timestamp query optimisation WIP --- src/realm/column_timestamp.cpp | 10 +++ src/realm/column_timestamp.hpp | 2 + src/realm/query_engine.hpp | 114 ++++++++++++++++++++++++++- test/benchmark-common-tasks/main.cpp | 111 ++++++++++++++++++-------- test/test_parser.cpp | 23 +++++- test/test_query.cpp | 14 +++- 6 files changed, 236 insertions(+), 38 deletions(-) diff --git a/src/realm/column_timestamp.cpp b/src/realm/column_timestamp.cpp index b5fd8b6ec18..e7d18c56224 100644 --- a/src/realm/column_timestamp.cpp +++ b/src/realm/column_timestamp.cpp @@ -354,6 +354,16 @@ void TimestampColumn::leaf_to_dot(MemRef, ArrayParent*, size_t /*ndx_in_parent*/ // FIXME: Dummy implementation } +void TimestampColumn::get_seconds_leaf(size_t ndx, size_t& ndx_in_leaf, BpTree>::LeafInfo& inout_leaf_info) const noexcept +{ + m_seconds->get_leaf(ndx, ndx_in_leaf, inout_leaf_info); +} + +void TimestampColumn::get_nanoseconds_leaf(size_t ndx, size_t& ndx_in_leaf, BpTree::LeafInfo& inout_leaf) const noexcept +{ + m_nanoseconds->get_leaf(ndx, ndx_in_leaf, inout_leaf); +} + // LCOV_EXCL_STOP ignore debug functions void TimestampColumn::add(const Timestamp& ts) diff --git a/src/realm/column_timestamp.hpp b/src/realm/column_timestamp.hpp index 4da23eb63db..62ff488d85c 100644 --- a/src/realm/column_timestamp.hpp +++ b/src/realm/column_timestamp.hpp @@ -83,6 +83,8 @@ class TimestampColumn : public ColumnBaseSimple { void to_dot(std::ostream&, StringData title = StringData()) const override; void do_dump_node_structure(std::ostream&, int level) const override; void leaf_to_dot(MemRef, ArrayParent*, size_t ndx_in_parent, std::ostream&) const override; + void get_seconds_leaf(size_t ndx, size_t& ndx_in_leaf, BpTree>::LeafInfo& inout_leaf) const noexcept; + void get_nanoseconds_leaf(size_t ndx, size_t& ndx_in_leaf, BpTree::LeafInfo& inout_leaf) const noexcept; void add(const Timestamp& ts = Timestamp{}); Timestamp get(size_t row_ndx) const noexcept; diff --git a/src/realm/query_engine.hpp b/src/realm/query_engine.hpp index 12d1d161adc..d8e591b6b9b 100644 --- a/src/realm/query_engine.hpp +++ b/src/realm/query_engine.hpp @@ -1288,6 +1288,11 @@ class TimestampNode : public ParentNode { public: using TConditionValue = Timestamp; static const bool special_null_node = false; + using LeafTypeSeconds = typename IntNullColumn::LeafType; + using LeafInfoSeconds = typename IntNullColumn::LeafInfo; + using LeafTypeNanos = typename IntegerColumn::LeafType; + using LeafInfoNanos = typename IntegerColumn::LeafInfo; + TimestampNode(Timestamp v, size_t column) : m_value(v) @@ -1315,14 +1320,101 @@ class TimestampNode : public ParentNode { ParentNode::init(); m_dD = 100.0; + + // Clear leaf cache + m_leaf_end_seconds = 0; + m_array_ptr_seconds.reset(); // Explicitly destroy the old one first, because we're reusing the memory. + m_array_ptr_seconds.reset(new (&m_leaf_cache_storage_seconds) LeafTypeSeconds(m_table->get_alloc())); + m_leaf_end_nanos = 0; + m_array_ptr_nanos.reset(); // Explicitly destroy the old one first, because we're reusing the memory. + m_array_ptr_nanos.reset(new (&m_leaf_cache_storage_nanos) LeafTypeNanos(m_table->get_alloc())); + + } + + void get_leaf_seconds(const TimestampColumn& col, size_t ndx) + { + size_t ndx_in_leaf; + LeafInfoSeconds leaf_info_seconds{&m_leaf_ptr_seconds, m_array_ptr_seconds.get()}; + col.get_seconds_leaf(ndx, ndx_in_leaf, leaf_info_seconds); + m_leaf_start_seconds = ndx - ndx_in_leaf; + m_leaf_end_seconds = m_leaf_start_seconds + m_leaf_ptr_seconds->size(); + } + + void get_leaf_nanos(const TimestampColumn& col, size_t ndx) + { + size_t ndx_in_leaf; + LeafInfoNanos leaf_info_nanos{&m_leaf_ptr_nanos, m_array_ptr_nanos.get()}; + col.get_nanoseconds_leaf(ndx, ndx_in_leaf, leaf_info_nanos); + m_leaf_start_nanos = ndx - ndx_in_leaf; + m_leaf_end_nanos = m_leaf_start_nanos + m_leaf_ptr_nanos->size(); } size_t find_first_local(size_t start, size_t end) override { - size_t ret = m_condition_column->find(m_value, start, end); - return ret; + REALM_ASSERT(this->m_table); + + if (this->m_value.is_null()) { + if (TConditionFunction::condition == cond_Greater || TConditionFunction::condition == cond_Less) { + return not_found; + } + } + + while (start < end) { + + // Cache internal leaves + if (start >= this->m_leaf_end_seconds || start < this->m_leaf_start_seconds) { + this->get_leaf_seconds(*this->m_condition_column, start); + } + + size_t end2; + if (end > this->m_leaf_end_seconds) + end2 = this->m_leaf_end_seconds - this->m_leaf_start_seconds; + else + end2 = end - this->m_leaf_start_seconds; + + size_t s; + int64_t needle = this->m_value.is_null() ? this->m_leaf_ptr_seconds->null_value() : this->m_value.get_seconds(); + s = this->m_leaf_ptr_seconds->template find_first(needle, start - this->m_leaf_start_seconds, end2); + + if (s == not_found) { + start = this->m_leaf_end_seconds; + continue; + } + else { + size_t ndx_in_col = s + this->m_leaf_start_seconds; + if (true || TConditionFunction::condition == cond_NotEqual) { // FIXME: specialise this with a template + // we might have passed some that match in seconds but not in nanoseconds +// for (size_t i = start; i < ndx_in_col; ++i) { +// Timestamp ts = m_condition_column->get(i); +// if (condition(ts, m_value, ts.is_null(), m_value.is_null())) { +// return i; +// } +// } + + + + return ndx_in_col; + } + Timestamp ts = m_condition_column->get(ndx_in_col); + if (condition(ts, m_value, ts.is_null(), m_value.is_null())) { + return ndx_in_col; + } + else { + ++start; + } + } + } + + return not_found; } + +// size_t find_first_local(size_t start, size_t end) override +// { +// size_t ret = m_condition_column->find(m_value, start, end); +// return ret; +// } + virtual std::string describe(util::serializer::SerialisationState& state) const override { REALM_ASSERT(m_condition_column != nullptr); @@ -1347,6 +1439,24 @@ class TimestampNode : public ParentNode { private: Timestamp m_value; const TimestampColumn* m_condition_column; + TConditionFunction condition; + + // Leaf cache seconds + using LeafCacheStorageSeconds = typename std::aligned_storage::type; + LeafCacheStorageSeconds m_leaf_cache_storage_seconds; + std::unique_ptr m_array_ptr_seconds; + const LeafTypeSeconds* m_leaf_ptr_seconds = nullptr; + size_t m_leaf_start_seconds = npos; + size_t m_leaf_end_seconds = 0; + size_t m_local_end_seconds; + // Leaf cache nanoseconds + using LeafCacheStorageNanos = typename std::aligned_storage::type; + LeafCacheStorageNanos m_leaf_cache_storage_nanos; + std::unique_ptr m_array_ptr_nanos; + const LeafTypeNanos* m_leaf_ptr_nanos = nullptr; + size_t m_leaf_start_nanos = npos; + size_t m_leaf_end_nanos = 0; + size_t m_local_end_nanos; }; class StringNodeBase : public ParentNode { diff --git a/test/benchmark-common-tasks/main.cpp b/test/benchmark-common-tasks/main.cpp index 2bfb1916471..2804e5d0634 100644 --- a/test/benchmark-common-tasks/main.cpp +++ b/test/benchmark-common-tasks/main.cpp @@ -17,6 +17,7 @@ **************************************************************************/ #include +#include #include #include @@ -40,7 +41,7 @@ using namespace realm::util; using namespace realm::test_util; namespace { -#define BASE_SIZE 3600 +#define BASE_SIZE 36000 /** This bechmark suite represents a number of common use cases, @@ -320,6 +321,49 @@ struct BenchmarkWithLongStrings : BenchmarkWithStrings { } }; +struct BenchmarkWithTimestamps : Benchmark { + std::multiset values; + void before_all(SharedGroup& group) + { + WriteTransaction tr(group); + TableRef t = tr.add_table("Timestamps"); + t->add_column(type_Timestamp, "timestamps"); + t->add_empty_row(BASE_SIZE * 10); + Random r; + for (size_t i = 0; i < BASE_SIZE * 10; ++i) { + Timestamp time{r.draw_int(0, 1000000), r.draw_int(0, 1000000)}; + t->set_timestamp(0, i, time); + values.insert(time); + } + tr.commit(); + } + + void after_all(SharedGroup& group) + { + Group& g = group.begin_write(); + g.remove_table("Timestamps"); + group.commit(); + } +}; + +struct BenchmarkQueryTimestampGreater : BenchmarkWithTimestamps { + const char* name() const + { + return "QueryTimestampGreater"; + } + + void operator()(SharedGroup& group) + { + ReadTransaction tr(group); + ConstTableRef table = tr.get_table("Timestamps"); + Query query = table->where().greater(0, *(values.begin())); + TableView results = query.find_all(); + REALM_ASSERT_EX(results.size() == values.size() - 1, results.size(), values.size() / 2, + values.size()); + static_cast(results); + } +}; + struct BenchmarkWithIntsTable : Benchmark { void before_all(SharedGroup& group) { @@ -1111,38 +1155,39 @@ int benchmark_common_tasks_main() #define BENCH(B) run_benchmark(results) - BENCH(BenchmarkUnorderedTableViewClear); - BENCH(BenchmarkEmptyCommit); - BENCH(AddTable); - BENCH(BenchmarkQuery); - BENCH(BenchmarkQueryNot); - BENCH(BenchmarkSize); - BENCH(BenchmarkSort); - BENCH(BenchmarkSortInt); - BENCH(BenchmarkDistinctIntFewDupes); - BENCH(BenchmarkDistinctIntManyDupes); - BENCH(BenchmarkDistinctStringFewDupes); - BENCH(BenchmarkDistinctStringManyDupes); - BENCH(BenchmarkFindAllStringFewDupes); - BENCH(BenchmarkFindAllStringManyDupes); - BENCH(BenchmarkFindFirstStringFewDupes); - BENCH(BenchmarkFindFirstStringManyDupes); - BENCH(BenchmarkInsert); - BENCH(BenchmarkGetString); - BENCH(BenchmarkSetString); - BENCH(BenchmarkCreateIndex); - BENCH(BenchmarkGetLongString); - BENCH(BenchmarkQueryLongString); - BENCH(BenchmarkSetLongString); - BENCH(BenchmarkGetLinkList); - BENCH(BenchmarkQueryInsensitiveString); - BENCH(BenchmarkQueryInsensitiveStringIndexed); - BENCH(BenchmarkNonInitatorOpen); - BENCH(BenchmarkQueryChainedOrStrings); - BENCH(BenchmarkQueryChainedOrInts); - BENCH(BenchmarkQueryChainedOrIntsIndexed); - BENCH(BenchmarkQueryIntEquality); - BENCH(BenchmarkQueryIntEqualityIndexed); +// BENCH(BenchmarkUnorderedTableViewClear); +// BENCH(BenchmarkEmptyCommit); +// BENCH(AddTable); +// BENCH(BenchmarkQuery); +// BENCH(BenchmarkQueryNot); +// BENCH(BenchmarkSize); +// BENCH(BenchmarkSort); +// BENCH(BenchmarkSortInt); +// BENCH(BenchmarkDistinctIntFewDupes); +// BENCH(BenchmarkDistinctIntManyDupes); +// BENCH(BenchmarkDistinctStringFewDupes); +// BENCH(BenchmarkDistinctStringManyDupes); +// BENCH(BenchmarkFindAllStringFewDupes); +// BENCH(BenchmarkFindAllStringManyDupes); +// BENCH(BenchmarkFindFirstStringFewDupes); +// BENCH(BenchmarkFindFirstStringManyDupes); +// BENCH(BenchmarkInsert); +// BENCH(BenchmarkGetString); +// BENCH(BenchmarkSetString); +// BENCH(BenchmarkCreateIndex); +// BENCH(BenchmarkGetLongString); +// BENCH(BenchmarkQueryLongString); +// BENCH(BenchmarkSetLongString); +// BENCH(BenchmarkGetLinkList); +// BENCH(BenchmarkQueryInsensitiveString); +// BENCH(BenchmarkQueryInsensitiveStringIndexed); +// BENCH(BenchmarkNonInitatorOpen); +// BENCH(BenchmarkQueryChainedOrStrings); +// BENCH(BenchmarkQueryChainedOrInts); +// BENCH(BenchmarkQueryChainedOrIntsIndexed); +// BENCH(BenchmarkQueryIntEquality); +// BENCH(BenchmarkQueryIntEqualityIndexed); + BENCH(BenchmarkQueryTimestampGreater); #undef BENCH return 0; diff --git a/test/test_parser.cpp b/test/test_parser.cpp index d0fca85b380..3b5702824a5 100644 --- a/test/test_parser.cpp +++ b/test/test_parser.cpp @@ -384,7 +384,6 @@ Query verify_query(test_util::unit_test::TestContext& test_context, TableRef t, parser::ParserResult res2 = realm::parser::parse(description); realm::query_builder::apply_predicate(q2, res2.predicate, args); - CHECK_EQUAL(q2.count(), num_results); return q2; } @@ -454,9 +453,11 @@ TEST(Parser_basic_serialisation) TableRef t = g.add_table(table_name); size_t int_col_ndx = t->add_column(type_Int, "age"); size_t str_col_ndx = t->add_column(type_String, "name"); - size_t double_col_ndx = t->add_column(type_Double, "fees"); + size_t double_col_ndx = t->add_column(type_Double, "fees", true); + size_t bool_col_ndx = t->add_column(type_Bool, "licensed", true); size_t link_col_ndx = t->add_column_link(type_Link, "buddy", *t); size_t time_col_ndx = t->add_column(type_Timestamp, "time", true); + t->add_search_index(int_col_ndx); t->add_empty_row(5); std::vector names = {"Billy", "Bob", "Joe", "Jane", "Joel"}; std::vector fees = { 2.0, 2.23, 2.22, 2.25, 3.73 }; @@ -465,6 +466,7 @@ TEST(Parser_basic_serialisation) t->set_int(int_col_ndx, i, i); t->set_string(str_col_ndx, i, names[i]); t->set_double(double_col_ndx, i, fees[i]); + t->set_bool(bool_col_ndx, i, i % 2 == 0); } t->set_timestamp(time_col_ndx, 0, Timestamp(realm::null())); t->set_timestamp(time_col_ndx, 1, Timestamp(1512130073, 0)); // 2017/12/02 @ 12:47am (UTC) @@ -493,8 +495,24 @@ TEST(Parser_basic_serialisation) verify_query(test_context, t, "3 =< age", 2); verify_query(test_context, t, "age > 2 and age < 4", 1); verify_query(test_context, t, "age = 1 || age == 3", 2); + verify_query(test_context, t, "fees = 1.2 || fees = 2.23", 1); + verify_query(test_context, t, "fees = 2 || fees = 3", 1); + verify_query(test_context, t, "fees = 2 || fees = 3 || fees = 4", 1); + verify_query(test_context, t, "fees = 0 || fees = 1", 0); + verify_query(test_context, t, "fees != 2.22 && fees > 2.2", 3); verify_query(test_context, t, "(age > 1 || fees >= 2.25) && age == 4", 1); + verify_query(test_context, t, "licensed == true", 3); + verify_query(test_context, t, "licensed == false", 2); + verify_query(test_context, t, "licensed = true || licensed = true", 3); + verify_query(test_context, t, "licensed = 1 || licensed = 0", 5); + verify_query(test_context, t, "licensed = true || licensed = false", 5); + verify_query(test_context, t, "licensed == true || licensed == false", 5); + verify_query(test_context, t, "licensed == true || buddy.licensed == true", 3); + verify_query(test_context, t, "buddy.licensed == true", 0); + verify_query(test_context, t, "buddy.licensed == false", 1); + verify_query(test_context, t, "licensed == false || buddy.licensed == false", 3); + verify_query(test_context, t, "licensed == true or licensed = true || licensed = TRUE", 3); verify_query(test_context, t, "name = \"Joe\"", 1); verify_query(test_context, t, "buddy.age > 0", 1); verify_query(test_context, t, "name BEGINSWITH \"J\"", 3); @@ -521,6 +539,7 @@ TEST(Parser_basic_serialisation) CHECK(message.find("missing_property") != std::string::npos); } + TEST(Parser_LinksToSameTable) { Group g; diff --git a/test/test_query.cpp b/test/test_query.cpp index 45092e8b9a8..1027f6fdb9f 100644 --- a/test/test_query.cpp +++ b/test/test_query.cpp @@ -10557,7 +10557,7 @@ TEST(Query_MoveDoesntDoubleDelete) } } -TEST(Query_Timestamp) +ONLY(Query_Timestamp) { size_t match; Table table; @@ -10609,6 +10609,18 @@ TEST(Query_Timestamp) match = (first != null{}).count(); CHECK_EQUAL(match, 5); + match = (first > null{}).count(); + CHECK_EQUAL(match, 0); + + match = (first < null{}).count(); + CHECK_EQUAL(match, 0); + + match = (first >= null{}).count(); + CHECK_EQUAL(match, 1); + + match = (first <= null{}).count(); + CHECK_EQUAL(match, 1); + match = (first != Timestamp(0, 0)).count(); CHECK_EQUAL(match, 5); From 2835d09512ba90085e9ac773db86689b3dc6d02d Mon Sep 17 00:00:00 2001 From: James Stone Date: Mon, 12 Aug 2019 17:29:25 -0700 Subject: [PATCH 3/8] Timestamp greater optimisation --- src/realm/query_engine.cpp | 41 ++++++++++++ src/realm/query_engine.hpp | 97 +++++++++++++++++----------- test/benchmark-common-tasks/main.cpp | 88 ++++++++++++++++++++++++- 3 files changed, 184 insertions(+), 42 deletions(-) diff --git a/src/realm/query_engine.cpp b/src/realm/query_engine.cpp index af16336c1dc..818f97084f7 100644 --- a/src/realm/query_engine.cpp +++ b/src/realm/query_engine.cpp @@ -684,3 +684,44 @@ ExpressionNode::ExpressionNode(const ExpressionNode& from, QueryNodeHandoverPatc , m_expression(from.m_expression->clone(patches)) { } + + +template<> +size_t TimestampNode::find_first_local(size_t start, size_t end) +{ + REALM_ASSERT(this->m_table); + + if (this->m_value.is_null()) { + return not_found; + } + Greater cond; + + while (start < end) { + size_t ret = this->find_first_local_seconds(start, end); + + if (ret == not_found) + return not_found; + + util::Optional seconds = get_seconds_and_cache(ret); + if (!seconds) { + start = ret + 1; + continue; + } + if (have_nanoseconds_leaf_and_all_are_zeros(ret)) { + if (*seconds > m_value.get_seconds()) { + return ret; + } else { + start = ret + 1; + continue; + } + } + int32_t nanos = this->get_nanoseconds_and_cache(ret); + Timestamp ts{*seconds, nanos}; + if (cond(ts, m_value, ts.is_null(), m_value.is_null())) { + return ret; + } + start = ret + 1; + } + + return not_found; +} diff --git a/src/realm/query_engine.hpp b/src/realm/query_engine.hpp index d8e591b6b9b..9670f1f1b71 100644 --- a/src/realm/query_engine.hpp +++ b/src/realm/query_engine.hpp @@ -1349,18 +1349,9 @@ class TimestampNode : public ParentNode { m_leaf_end_nanos = m_leaf_start_nanos + m_leaf_ptr_nanos->size(); } - size_t find_first_local(size_t start, size_t end) override - { - REALM_ASSERT(this->m_table); - - if (this->m_value.is_null()) { - if (TConditionFunction::condition == cond_Greater || TConditionFunction::condition == cond_Less) { - return not_found; - } - } - + template + size_t find_first_local_seconds(size_t start, size_t end) { while (start < end) { - // Cache internal leaves if (start >= this->m_leaf_end_seconds || start < this->m_leaf_start_seconds) { this->get_leaf_seconds(*this->m_condition_column, start); @@ -1372,48 +1363,76 @@ class TimestampNode : public ParentNode { else end2 = end - this->m_leaf_start_seconds; - size_t s; - int64_t needle = this->m_value.is_null() ? this->m_leaf_ptr_seconds->null_value() : this->m_value.get_seconds(); - s = this->m_leaf_ptr_seconds->template find_first(needle, start - this->m_leaf_start_seconds, end2); + int64_t needle = this->m_value.is_null() ? this->m_leaf_ptr_seconds->null_value() : this->m_value.get_seconds(); // FIXME: test null + size_t s = this->m_leaf_ptr_seconds->template find_first(needle, start - this->m_leaf_start_seconds, end2); if (s == not_found) { start = this->m_leaf_end_seconds; continue; } - else { - size_t ndx_in_col = s + this->m_leaf_start_seconds; - if (true || TConditionFunction::condition == cond_NotEqual) { // FIXME: specialise this with a template - // we might have passed some that match in seconds but not in nanoseconds -// for (size_t i = start; i < ndx_in_col; ++i) { -// Timestamp ts = m_condition_column->get(i); -// if (condition(ts, m_value, ts.is_null(), m_value.is_null())) { -// return i; -// } -// } + return s + this->m_leaf_start_seconds; + } + return not_found; + } + util::Optional get_seconds_and_cache(size_t ndx) { + // Cache internal leaves + if (ndx >= this->m_leaf_end_seconds || ndx < this->m_leaf_start_seconds) { + this->get_leaf_seconds(*this->m_condition_column, ndx); + } + return this->m_leaf_ptr_seconds->get(ndx - this->m_leaf_start_seconds); + } - return ndx_in_col; - } - Timestamp ts = m_condition_column->get(ndx_in_col); - if (condition(ts, m_value, ts.is_null(), m_value.is_null())) { - return ndx_in_col; - } - else { - ++start; - } - } + bool have_nanoseconds_leaf_and_all_are_zeros(size_t ndx) { + if (ndx >= this->m_leaf_start_nanos && ndx < this->m_leaf_end_nanos) { + return this->m_leaf_ptr_nanos->get_width() == 0; } + return false; + } + + int32_t get_nanoseconds_and_cache(size_t ndx) { + // Cache internal leaves + if (ndx >= this->m_leaf_end_nanos || ndx < this->m_leaf_start_nanos) { + this->get_leaf_nanos(*this->m_condition_column, ndx); + } + return int32_t(this->m_leaf_ptr_nanos->get(ndx - this->m_leaf_start_nanos)); + } + + template + size_t find_first_local_nanoseconds(size_t start, size_t end) { + while (start < end) { + // Cache internal leaves + if (start >= this->m_leaf_end_nanos || start < this->m_leaf_start_nanos) { + this->get_leaf_nanos(*this->m_condition_column, start); + } + + size_t end2; + if (end > this->m_leaf_end_nanos) + end2 = this->m_leaf_end_nanos - this->m_leaf_start_nanos; + else + end2 = end - this->m_leaf_start_nanos; + + int32_t needle = this->m_value.get_nanoseconds(); // FIXME: test null + size_t s = this->m_leaf_ptr_nanos->template find_first(needle, start - this->m_leaf_start_nanos, end2); + if (s == not_found) { + start = this->m_leaf_end_nanos; + continue; + } + return s + this->m_leaf_start_nanos; + } return not_found; } + // see query_engine.cpp for operator specialisations + size_t find_first_local(size_t start, size_t end) override + { + REALM_ASSERT(this->m_table); -// size_t find_first_local(size_t start, size_t end) override -// { -// size_t ret = m_condition_column->find(m_value, start, end); -// return ret; -// } + size_t ret = m_condition_column->find(m_value, start, end); + return ret; + } virtual std::string describe(util::serializer::SerialisationState& state) const override { diff --git a/test/benchmark-common-tasks/main.cpp b/test/benchmark-common-tasks/main.cpp index 2804e5d0634..83ed254e0cc 100644 --- a/test/benchmark-common-tasks/main.cpp +++ b/test/benchmark-common-tasks/main.cpp @@ -323,6 +323,9 @@ struct BenchmarkWithLongStrings : BenchmarkWithStrings { struct BenchmarkWithTimestamps : Benchmark { std::multiset values; + Timestamp needle; + size_t num_results_to_needle; + double percent_results_to_needle = 0.5; void before_all(SharedGroup& group) { WriteTransaction tr(group); @@ -336,6 +339,14 @@ struct BenchmarkWithTimestamps : Benchmark { values.insert(time); } tr.commit(); + // simulate a work load where this percent of random results match + num_results_to_needle = values.size() * percent_results_to_needle; + // this relies on values being stored in sorted order by std::multiset + auto it = values.begin(); + for (size_t i = 0; i < num_results_to_needle; ++i) { + ++it; + } + needle = *it; } void after_all(SharedGroup& group) @@ -347,6 +358,10 @@ struct BenchmarkWithTimestamps : Benchmark { }; struct BenchmarkQueryTimestampGreater : BenchmarkWithTimestamps { + void before_all(SharedGroup& group) { + percent_results_to_needle = 2.0f / 3.0f; + BenchmarkWithTimestamps::before_all(group); + } const char* name() const { return "QueryTimestampGreater"; @@ -356,10 +371,73 @@ struct BenchmarkQueryTimestampGreater : BenchmarkWithTimestamps { { ReadTransaction tr(group); ConstTableRef table = tr.get_table("Timestamps"); - Query query = table->where().greater(0, *(values.begin())); + Query query = table->where().greater(0, needle); + TableView results = query.find_all(); + REALM_ASSERT_EX(results.size() == values.size() - num_results_to_needle - 1, results.size(), num_results_to_needle, values.size()); + static_cast(results); + } +}; + +struct BenchmarkQueryTimestampGreaterEqual : BenchmarkWithTimestamps { + void before_all(SharedGroup& group) { + percent_results_to_needle = 2.0f / 3.0f; + BenchmarkWithTimestamps::before_all(group); + } + const char* name() const + { + return "QueryTimestampGreaterEqual"; + } + + void operator()(SharedGroup& group) + { + ReadTransaction tr(group); + ConstTableRef table = tr.get_table("Timestamps"); + Query query = table->where().greater_equal(0, needle); + TableView results = query.find_all(); + REALM_ASSERT_EX(results.size() == values.size() - num_results_to_needle, results.size(), num_results_to_needle, values.size()); + static_cast(results); + } +}; + + +struct BenchmarkQueryTimestampLess : BenchmarkWithTimestamps { + void before_all(SharedGroup& group) { + percent_results_to_needle = 1.0f / 3.0f; + BenchmarkWithTimestamps::before_all(group); + } + const char* name() const + { + return "QueryTimestampLess"; + } + + void operator()(SharedGroup& group) + { + ReadTransaction tr(group); + ConstTableRef table = tr.get_table("Timestamps"); + Query query = table->where().less(0, needle); TableView results = query.find_all(); - REALM_ASSERT_EX(results.size() == values.size() - 1, results.size(), values.size() / 2, - values.size()); + REALM_ASSERT_EX(results.size() == num_results_to_needle, results.size(), num_results_to_needle, values.size()); + static_cast(results); + } +}; + +struct BenchmarkQueryTimestampLessEqual : BenchmarkWithTimestamps { + void before_all(SharedGroup& group) { + percent_results_to_needle = 1.0f / 3.0f; + BenchmarkWithTimestamps::before_all(group); + } + const char* name() const + { + return "QueryTimestampLessEqual"; + } + + void operator()(SharedGroup& group) + { + ReadTransaction tr(group); + ConstTableRef table = tr.get_table("Timestamps"); + Query query = table->where().less_equal(0, needle); + TableView results = query.find_all(); + REALM_ASSERT_EX(results.size() == num_results_to_needle + 1, results.size(), num_results_to_needle, values.size()); static_cast(results); } }; @@ -1187,7 +1265,11 @@ int benchmark_common_tasks_main() // BENCH(BenchmarkQueryChainedOrIntsIndexed); // BENCH(BenchmarkQueryIntEquality); // BENCH(BenchmarkQueryIntEqualityIndexed); + BENCH(BenchmarkQueryTimestampGreater); + BENCH(BenchmarkQueryTimestampGreaterEqual); + BENCH(BenchmarkQueryTimestampLess); + BENCH(BenchmarkQueryTimestampLessEqual); #undef BENCH return 0; From fc722cfd4a71bf89a96efa3b7afb846fac40ef29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B8rgen=20Edelbo?= Date: Tue, 13 Aug 2019 07:44:39 +0200 Subject: [PATCH 4/8] Fix compile error --- src/realm/query_engine.cpp | 3 ++- src/realm/query_engine.hpp | 1 - test/benchmark-common-tasks/main.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/realm/query_engine.cpp b/src/realm/query_engine.cpp index 818f97084f7..9ff57f30ce6 100644 --- a/src/realm/query_engine.cpp +++ b/src/realm/query_engine.cpp @@ -685,7 +685,7 @@ ExpressionNode::ExpressionNode(const ExpressionNode& from, QueryNodeHandoverPatc { } - +namespace realm { template<> size_t TimestampNode::find_first_local(size_t start, size_t end) { @@ -725,3 +725,4 @@ size_t TimestampNode::find_first_local(size_t start, size_t end) return not_found; } +} // namespace realm diff --git a/src/realm/query_engine.hpp b/src/realm/query_engine.hpp index 9670f1f1b71..099e4b81d9b 100644 --- a/src/realm/query_engine.hpp +++ b/src/realm/query_engine.hpp @@ -2377,7 +2377,6 @@ class TwoColumnsNode : public ParentNode { // For Next-Generation expressions like col1 / col2 + 123 > col4 * 100. class ExpressionNode : public ParentNode { - public: ExpressionNode(std::unique_ptr); diff --git a/test/benchmark-common-tasks/main.cpp b/test/benchmark-common-tasks/main.cpp index 83ed254e0cc..f406a83e25d 100644 --- a/test/benchmark-common-tasks/main.cpp +++ b/test/benchmark-common-tasks/main.cpp @@ -340,7 +340,7 @@ struct BenchmarkWithTimestamps : Benchmark { } tr.commit(); // simulate a work load where this percent of random results match - num_results_to_needle = values.size() * percent_results_to_needle; + num_results_to_needle = size_t(values.size() * percent_results_to_needle); // this relies on values being stored in sorted order by std::multiset auto it = values.begin(); for (size_t i = 0; i < num_results_to_needle; ++i) { From 2c0ebc18abbdd9d5405e045851a5f6edb96df670 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B8rgen=20Edelbo?= Date: Tue, 13 Aug 2019 09:16:24 +0200 Subject: [PATCH 5/8] Enhance test --- test/test_query.cpp | 2 +- test/test_shared.cpp | 65 +++++++++++++++++++++++++++++++++++--------- 2 files changed, 53 insertions(+), 14 deletions(-) diff --git a/test/test_query.cpp b/test/test_query.cpp index 1027f6fdb9f..7df0d5b4169 100644 --- a/test/test_query.cpp +++ b/test/test_query.cpp @@ -10557,7 +10557,7 @@ TEST(Query_MoveDoesntDoubleDelete) } } -ONLY(Query_Timestamp) +TEST(Query_Timestamp) { size_t match; Table table; diff --git a/test/test_shared.cpp b/test/test_shared.cpp index 6b133576e7a..c93410242a2 100644 --- a/test/test_shared.cpp +++ b/test/test_shared.cpp @@ -4029,19 +4029,58 @@ TEST(Shared_TimestampQuery) wt.commit(); } - Group& g = const_cast(sg.begin_read()); - auto table = g.get_table("table"); - auto col_date = table->get_column_index("date"); - - Query q = table->column(col_date) > Timestamp(0, 3); - auto cnt = q.count(); - CHECK_EQUAL(cnt, 6); - q = table->column(col_date) >= Timestamp(0, 3); - cnt = q.count(); - CHECK_EQUAL(cnt, 7); - q = table->column(col_date) > Timestamp(0, 3) && table->column(col_date) < Timestamp(1, 3); - cnt = q.count(); - CHECK_EQUAL(cnt, 3); + { + Group& g = const_cast(sg.begin_read()); + auto table = g.get_table("table"); + auto col_date = table->get_column_index("date"); + + Query q = table->column(col_date) > Timestamp(0, 3); + auto cnt = q.count(); + CHECK_EQUAL(cnt, 6); + q = table->column(col_date) >= Timestamp(0, 3); + cnt = q.count(); + CHECK_EQUAL(cnt, 7); + q = table->column(col_date) > Timestamp(0, 3) && + table->column(col_date) < Timestamp(1, 3); + cnt = q.count(); + CHECK_EQUAL(cnt, 3); + sg.end_read(); + } + + { + WriteTransaction wt(sg); + + auto table = wt.get_table("table"); + auto col_date = table->get_column_index("date"); + auto col_value = table->get_column_index("value"); + + table->clear(); + Random random(random_int()); // Seed from slow global generator + + for (int i = 0; i < 100000; i++) { + auto ndx = table->add_empty_row(); + int seconds = random.draw_int_max(3600 * 24 * 10); + table->set_timestamp(col_date, ndx, Timestamp(seconds, i)); + table->set_int(col_value, ndx, i); + } + wt.commit(); + } + + { + Group& g = const_cast(sg.begin_read()); + auto table = g.get_table("table"); + auto col_date = table->get_column_index("date"); + + Query q = table->column(col_date) > Timestamp(3600 * 24 * 5, 3); + auto start = std::chrono::steady_clock::now(); + auto cnt = q.count(); + auto end = std::chrono::steady_clock::now(); + + std::cout << "Time: " << std::chrono::duration_cast(end - start).count() << " us" + << std::endl; + CHECK_GREATER(cnt, 50000); + sg.end_read(); + } } #endif // TEST_SHARED From 93ea913a84c40bee8f42fc95194be8f2256c56a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B8rgen=20Edelbo?= Date: Tue, 13 Aug 2019 09:56:16 +0200 Subject: [PATCH 6/8] Revert "Optimize for 'Less' and 'Greater' query on Timestamp" This reverts commit 21ac6c4fcac547d32dbd17079638816ebc082a61. --- src/realm/array_basic.hpp | 5 ---- src/realm/array_basic_tpl.hpp | 12 ++------ src/realm/array_integer.hpp | 6 ---- src/realm/bptree.hpp | 6 ++-- src/realm/column.hpp | 2 +- src/realm/column_timestamp.cpp | 52 ---------------------------------- src/realm/column_timestamp.hpp | 11 +------ test/test_shared.cpp | 5 ++-- 8 files changed, 9 insertions(+), 90 deletions(-) diff --git a/src/realm/array_basic.hpp b/src/realm/array_basic.hpp index 7f650e11c32..6b9c212f473 100644 --- a/src/realm/array_basic.hpp +++ b/src/realm/array_basic.hpp @@ -47,12 +47,7 @@ class BasicArray : public Array { void truncate(size_t size); void clear(); - template size_t find_first(T value, size_t begin = 0, size_t end = npos) const; - size_t find_first(T value, size_t begin = 0, size_t end = npos) const - { - return find_first(value, begin, end); - } void find_all(IntegerColumn* result, T value, size_t add_offset = 0, size_t begin = 0, size_t end = npos) const; size_t count(T value, size_t begin = 0, size_t end = npos) const; diff --git a/src/realm/array_basic_tpl.hpp b/src/realm/array_basic_tpl.hpp index e2b8b89228f..5ce37fde1fa 100644 --- a/src/realm/array_basic_tpl.hpp +++ b/src/realm/array_basic_tpl.hpp @@ -286,16 +286,8 @@ size_t BasicArray::find(T value, size_t begin, size_t end) const return i == data + end ? not_found : size_t(i - data); } -template <> -template <> -inline size_t BasicArray::find_first(float value, size_t begin, size_t end) const -{ - return this->find(value, begin, end); -} - -template <> -template <> -inline size_t BasicArray::find_first(double value, size_t begin, size_t end) const +template +inline size_t BasicArray::find_first(T value, size_t begin, size_t end) const { return this->find(value, begin, end); } diff --git a/src/realm/array_integer.hpp b/src/realm/array_integer.hpp index cf535ffe2f6..e460c5381de 100644 --- a/src/realm/array_integer.hpp +++ b/src/realm/array_integer.hpp @@ -162,7 +162,6 @@ class ArrayIntNull : public Array { size_t find_first(value_type value, size_t begin = 0, size_t end = npos) const; - size_t find_first_ge(value_type value, size_t begin = 0, size_t end = npos) const; // Overwrite Array::bptree_leaf_insert to correctly split nodes. @@ -625,11 +624,6 @@ inline size_t ArrayIntNull::find_first(value_type value, size_t begin, size_t en { return find_first(value, begin, end); } - -inline size_t ArrayIntNull::find_first_ge(value_type value, size_t begin, size_t end) const -{ - return find_first(value, begin, end); -} } #endif // REALM_ARRAY_INTEGER_HPP diff --git a/src/realm/bptree.hpp b/src/realm/bptree.hpp index ae1a621e8d4..aae64de8299 100644 --- a/src/realm/bptree.hpp +++ b/src/realm/bptree.hpp @@ -291,7 +291,6 @@ class BpTree : public BpTreeBase { T front() const noexcept; T back() const noexcept; - template size_t find_first(T value, size_t begin = 0, size_t end = npos) const; void find_all(IntegerColumn& out_indices, T value, size_t begin = 0, size_t end = npos) const; @@ -1177,11 +1176,10 @@ void BpTree::get_leaf(size_t ndx, size_t& ndx_in_leaf, LeafInfo& inout_leaf_i } template -template size_t BpTree::find_first(T value, size_t begin, size_t end) const { if (root_is_leaf()) { - return root_as_leaf().template find_first(value, begin, end); + return root_as_leaf().find_first(value, begin, end); } // FIXME: It would be better to always require that 'end' is @@ -1199,7 +1197,7 @@ size_t BpTree::find_first(T value, size_t begin, size_t end) const get_leaf(ndx_in_tree, ndx_in_leaf, leaf_info); size_t leaf_offset = ndx_in_tree - ndx_in_leaf; size_t end_in_leaf = std::min(leaf->size(), end - leaf_offset); - size_t ndx = leaf->template find_first(value, ndx_in_leaf, end_in_leaf); // Throws (maybe) + size_t ndx = leaf->find_first(value, ndx_in_leaf, end_in_leaf); // Throws (maybe) if (ndx != not_found) return leaf_offset + ndx; ndx_in_tree = leaf_offset + end_in_leaf; diff --git a/src/realm/column.hpp b/src/realm/column.hpp index e073a3cf99c..928fd93a7ce 100644 --- a/src/realm/column.hpp +++ b/src/realm/column.hpp @@ -1053,7 +1053,7 @@ size_t Column::find_first(T value, size_t begin, size_t end) const if (m_search_index && begin == 0 && end == npos) return m_search_index->find_first(value); - return m_tree.template find_first(value, begin, end); + return m_tree.find_first(value, begin, end); } template diff --git a/src/realm/column_timestamp.cpp b/src/realm/column_timestamp.cpp index e7d18c56224..38b3e77f3b2 100644 --- a/src/realm/column_timestamp.cpp +++ b/src/realm/column_timestamp.cpp @@ -432,56 +432,4 @@ Timestamp TimestampColumn::minimum(size_t* result_index) const { return minmax(result_index); } - -template <> -size_t TimestampColumn::find(Timestamp value, size_t begin, size_t end) const noexcept -{ - if (m_nullable || value.is_null()) { - return find_slow(value, begin, end); - } - - auto seconds = value.get_seconds(); - auto ns = value.get_nanoseconds(); - - while (begin < end) { - auto ndx = m_seconds->find_first(seconds, begin, end); - if (ndx != npos) { - if (*m_seconds->get(ndx) > seconds || m_nanoseconds->get(ndx) > ns) { - return ndx; - } - begin = ndx + 1; - } - else { - begin = end; - } - } - - return npos; -} - -template <> -size_t TimestampColumn::find(Timestamp value, size_t begin, size_t end) const noexcept -{ - if (m_nullable || value.is_null()) { - return find_slow(value, begin, end); - } - - auto seconds = value.get_seconds(); - auto ns = value.get_nanoseconds(); - - while (begin < end) { - auto ndx = m_seconds->find_first(seconds, begin, end); - if (ndx != npos) { - if (*m_seconds->get(ndx) < seconds || m_nanoseconds->get(ndx) < ns) { - return ndx; - } - begin = ndx + 1; - } - else { - begin = end; - } - } - - return npos; -} } diff --git a/src/realm/column_timestamp.hpp b/src/realm/column_timestamp.hpp index 62ff488d85c..0734011c5f7 100644 --- a/src/realm/column_timestamp.hpp +++ b/src/realm/column_timestamp.hpp @@ -98,7 +98,7 @@ class TimestampColumn : public ColumnBaseSimple { void erase(size_t row_ndx, bool is_last); template - size_t find_slow(Timestamp value, size_t begin, size_t end) const noexcept + size_t find(Timestamp value, size_t begin, size_t end) const noexcept { // FIXME: Here we can do all sorts of clever optimizations. Use bithack-search on seconds, then for each match // check nanoseconds, etc. Lots of possibilities. Below code is naive and slow but works. @@ -112,12 +112,6 @@ class TimestampColumn : public ColumnBaseSimple { return npos; } - template - size_t find(Timestamp value, size_t begin, size_t end) const noexcept - { - return find_slow(value, begin, end); - } - typedef Timestamp value_type; private: @@ -158,9 +152,6 @@ class TimestampColumn : public ColumnBaseSimple { } }; -template <> -size_t TimestampColumn::find(Timestamp value, size_t begin, size_t end) const noexcept; - } // namespace realm #endif // REALM_COLUMN_TIMESTAMP_HPP diff --git a/test/test_shared.cpp b/test/test_shared.cpp index c93410242a2..04f69f6333d 100644 --- a/test/test_shared.cpp +++ b/test/test_shared.cpp @@ -4007,7 +4007,7 @@ TEST(Shared_GetCommitSize) CHECK_LESS(size_after - size_before, commit_size); } } - +/* TEST(Shared_TimestampQuery) { SHARED_GROUP_TEST_PATH(path); @@ -4060,7 +4060,7 @@ TEST(Shared_TimestampQuery) for (int i = 0; i < 100000; i++) { auto ndx = table->add_empty_row(); int seconds = random.draw_int_max(3600 * 24 * 10); - table->set_timestamp(col_date, ndx, Timestamp(seconds, i)); + table->set_timestamp(col_date, ndx, Timestamp(seconds, 0)); table->set_int(col_value, ndx, i); } wt.commit(); @@ -4082,5 +4082,6 @@ TEST(Shared_TimestampQuery) sg.end_read(); } } +*/ #endif // TEST_SHARED From 98b289e140262e88913b7c9b1994e1a101414259 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B8rgen=20Edelbo?= Date: Tue, 13 Aug 2019 11:30:57 +0200 Subject: [PATCH 7/8] A bit of cleanup and refactoring --- src/realm/column_timestamp.cpp | 6 +- src/realm/column_timestamp.hpp | 3 +- src/realm/query_engine.cpp | 51 +++++++++--- src/realm/query_engine.hpp | 146 ++++++++++++++------------------- 4 files changed, 105 insertions(+), 101 deletions(-) diff --git a/src/realm/column_timestamp.cpp b/src/realm/column_timestamp.cpp index 38b3e77f3b2..701978834fe 100644 --- a/src/realm/column_timestamp.cpp +++ b/src/realm/column_timestamp.cpp @@ -354,12 +354,14 @@ void TimestampColumn::leaf_to_dot(MemRef, ArrayParent*, size_t /*ndx_in_parent*/ // FIXME: Dummy implementation } -void TimestampColumn::get_seconds_leaf(size_t ndx, size_t& ndx_in_leaf, BpTree>::LeafInfo& inout_leaf_info) const noexcept +void TimestampColumn::get_seconds_leaf(size_t ndx, size_t& ndx_in_leaf, + BpTree>::LeafInfo& inout_leaf_info) const noexcept { m_seconds->get_leaf(ndx, ndx_in_leaf, inout_leaf_info); } -void TimestampColumn::get_nanoseconds_leaf(size_t ndx, size_t& ndx_in_leaf, BpTree::LeafInfo& inout_leaf) const noexcept +void TimestampColumn::get_nanoseconds_leaf(size_t ndx, size_t& ndx_in_leaf, + BpTree::LeafInfo& inout_leaf) const noexcept { m_nanoseconds->get_leaf(ndx, ndx_in_leaf, inout_leaf); } diff --git a/src/realm/column_timestamp.hpp b/src/realm/column_timestamp.hpp index 0734011c5f7..cffaa90de0f 100644 --- a/src/realm/column_timestamp.hpp +++ b/src/realm/column_timestamp.hpp @@ -83,7 +83,8 @@ class TimestampColumn : public ColumnBaseSimple { void to_dot(std::ostream&, StringData title = StringData()) const override; void do_dump_node_structure(std::ostream&, int level) const override; void leaf_to_dot(MemRef, ArrayParent*, size_t ndx_in_parent, std::ostream&) const override; - void get_seconds_leaf(size_t ndx, size_t& ndx_in_leaf, BpTree>::LeafInfo& inout_leaf) const noexcept; + void get_seconds_leaf(size_t ndx, size_t& ndx_in_leaf, + BpTree>::LeafInfo& inout_leaf) const noexcept; void get_nanoseconds_leaf(size_t ndx, size_t& ndx_in_leaf, BpTree::LeafInfo& inout_leaf) const noexcept; void add(const Timestamp& ts = Timestamp{}); diff --git a/src/realm/query_engine.cpp b/src/realm/query_engine.cpp index 9ff57f30ce6..967b5ddacd8 100644 --- a/src/realm/query_engine.cpp +++ b/src/realm/query_engine.cpp @@ -686,7 +686,7 @@ ExpressionNode::ExpressionNode(const ExpressionNode& from, QueryNodeHandoverPatc } namespace realm { -template<> +template <> size_t TimestampNode::find_first_local(size_t start, size_t end) { REALM_ASSERT(this->m_table); @@ -694,8 +694,6 @@ size_t TimestampNode::find_first_local(size_t start, size_t end) if (this->m_value.is_null()) { return not_found; } - Greater cond; - while (start < end) { size_t ret = this->find_first_local_seconds(start, end); @@ -707,17 +705,46 @@ size_t TimestampNode::find_first_local(size_t start, size_t end) start = ret + 1; continue; } - if (have_nanoseconds_leaf_and_all_are_zeros(ret)) { - if (*seconds > m_value.get_seconds()) { - return ret; - } else { - start = ret + 1; - continue; - } + if (*seconds > m_value.get_seconds()) { + return ret; + } + // We now know that neither m_value nor current value is null and that seconds part equals + // We are just missing to compare nanoseconds part + int32_t nanos = this->get_nanoseconds_and_cache(ret); + if (nanos > m_value.get_nanoseconds()) { + return ret; + } + start = ret + 1; + } + + return not_found; +} +template <> +size_t TimestampNode::find_first_local(size_t start, size_t end) +{ + REALM_ASSERT(this->m_table); + + if (this->m_value.is_null()) { + return not_found; + } + while (start < end) { + size_t ret = this->find_first_local_seconds(start, end); + + if (ret == not_found) + return not_found; + + util::Optional seconds = get_seconds_and_cache(ret); + if (!seconds) { + start = ret + 1; + continue; + } + if (*seconds < m_value.get_seconds()) { + return ret; } + // We now know that neither m_value nor current value is null and that seconds part equals + // We are just missing to compare nanoseconds part int32_t nanos = this->get_nanoseconds_and_cache(ret); - Timestamp ts{*seconds, nanos}; - if (cond(ts, m_value, ts.is_null(), m_value.is_null())) { + if (nanos < m_value.get_nanoseconds()) { return ret; } start = ret + 1; diff --git a/src/realm/query_engine.hpp b/src/realm/query_engine.hpp index 099e4b81d9b..e78be60b849 100644 --- a/src/realm/query_engine.hpp +++ b/src/realm/query_engine.hpp @@ -1283,8 +1283,7 @@ class BinaryNode : public ParentNode { }; -template -class TimestampNode : public ParentNode { +class TimestampNodeBase : public ParentNode { public: using TConditionValue = Timestamp; static const bool special_null_node = false; @@ -1294,14 +1293,14 @@ class TimestampNode : public ParentNode { using LeafInfoNanos = typename IntegerColumn::LeafInfo; - TimestampNode(Timestamp v, size_t column) + TimestampNodeBase(Timestamp v, size_t column) : m_value(v) { m_condition_column_idx = column; } - TimestampNode(null, size_t column) - : TimestampNode(Timestamp{}, column) + TimestampNodeBase(null, size_t column) + : TimestampNodeBase(Timestamp{}, column) { } @@ -1328,9 +1327,9 @@ class TimestampNode : public ParentNode { m_leaf_end_nanos = 0; m_array_ptr_nanos.reset(); // Explicitly destroy the old one first, because we're reusing the memory. m_array_ptr_nanos.reset(new (&m_leaf_cache_storage_nanos) LeafTypeNanos(m_table->get_alloc())); - } +protected: void get_leaf_seconds(const TimestampColumn& col, size_t ndx) { size_t ndx_in_leaf; @@ -1349,33 +1348,8 @@ class TimestampNode : public ParentNode { m_leaf_end_nanos = m_leaf_start_nanos + m_leaf_ptr_nanos->size(); } - template - size_t find_first_local_seconds(size_t start, size_t end) { - while (start < end) { - // Cache internal leaves - if (start >= this->m_leaf_end_seconds || start < this->m_leaf_start_seconds) { - this->get_leaf_seconds(*this->m_condition_column, start); - } - - size_t end2; - if (end > this->m_leaf_end_seconds) - end2 = this->m_leaf_end_seconds - this->m_leaf_start_seconds; - else - end2 = end - this->m_leaf_start_seconds; - - int64_t needle = this->m_value.is_null() ? this->m_leaf_ptr_seconds->null_value() : this->m_value.get_seconds(); // FIXME: test null - size_t s = this->m_leaf_ptr_seconds->template find_first(needle, start - this->m_leaf_start_seconds, end2); - - if (s == not_found) { - start = this->m_leaf_end_seconds; - continue; - } - return s + this->m_leaf_start_seconds; - } - return not_found; - } - - util::Optional get_seconds_and_cache(size_t ndx) { + util::Optional get_seconds_and_cache(size_t ndx) + { // Cache internal leaves if (ndx >= this->m_leaf_end_seconds || ndx < this->m_leaf_start_seconds) { this->get_leaf_seconds(*this->m_condition_column, ndx); @@ -1383,15 +1357,8 @@ class TimestampNode : public ParentNode { return this->m_leaf_ptr_seconds->get(ndx - this->m_leaf_start_seconds); } - - bool have_nanoseconds_leaf_and_all_are_zeros(size_t ndx) { - if (ndx >= this->m_leaf_start_nanos && ndx < this->m_leaf_end_nanos) { - return this->m_leaf_ptr_nanos->get_width() == 0; - } - return false; - } - - int32_t get_nanoseconds_and_cache(size_t ndx) { + int32_t get_nanoseconds_and_cache(size_t ndx) + { // Cache internal leaves if (ndx >= this->m_leaf_end_nanos || ndx < this->m_leaf_start_nanos) { this->get_leaf_nanos(*this->m_condition_column, ndx); @@ -1399,28 +1366,66 @@ class TimestampNode : public ParentNode { return int32_t(this->m_leaf_ptr_nanos->get(ndx - this->m_leaf_start_nanos)); } - template - size_t find_first_local_nanoseconds(size_t start, size_t end) { + TimestampNodeBase(const TimestampNodeBase& from, QueryNodeHandoverPatches* patches) + : ParentNode(from, patches) + , m_value(from.m_value) + , m_condition_column(from.m_condition_column) + { + if (m_condition_column && patches) + m_condition_column_idx = m_condition_column->get_column_index(); + } + + Timestamp m_value; + const TimestampColumn* m_condition_column; + + // Leaf cache seconds + using LeafCacheStorageSeconds = + typename std::aligned_storage::type; + LeafCacheStorageSeconds m_leaf_cache_storage_seconds; + std::unique_ptr m_array_ptr_seconds; + const LeafTypeSeconds* m_leaf_ptr_seconds = nullptr; + size_t m_leaf_start_seconds = npos; + size_t m_leaf_end_seconds = 0; + + // Leaf cache nanoseconds + using LeafCacheStorageNanos = typename std::aligned_storage::type; + LeafCacheStorageNanos m_leaf_cache_storage_nanos; + std::unique_ptr m_array_ptr_nanos; + const LeafTypeNanos* m_leaf_ptr_nanos = nullptr; + size_t m_leaf_start_nanos = npos; + size_t m_leaf_end_nanos = 0; +}; + +template +class TimestampNode : public TimestampNodeBase { +public: + using TimestampNodeBase::TimestampNodeBase; + + template + size_t find_first_local_seconds(size_t start, size_t end) + { + REALM_ASSERT(!this->m_value.is_null()); while (start < end) { // Cache internal leaves - if (start >= this->m_leaf_end_nanos || start < this->m_leaf_start_nanos) { - this->get_leaf_nanos(*this->m_condition_column, start); + if (start >= this->m_leaf_end_seconds || start < this->m_leaf_start_seconds) { + this->get_leaf_seconds(*this->m_condition_column, start); } size_t end2; - if (end > this->m_leaf_end_nanos) - end2 = this->m_leaf_end_nanos - this->m_leaf_start_nanos; + if (end > this->m_leaf_end_seconds) + end2 = this->m_leaf_end_seconds - this->m_leaf_start_seconds; else - end2 = end - this->m_leaf_start_nanos; + end2 = end - this->m_leaf_start_seconds; - int32_t needle = this->m_value.get_nanoseconds(); // FIXME: test null - size_t s = this->m_leaf_ptr_nanos->template find_first(needle, start - this->m_leaf_start_nanos, end2); + int64_t needle = this->m_value.get_seconds(); + size_t s = this->m_leaf_ptr_seconds->template find_first( + needle, start - this->m_leaf_start_seconds, end2); if (s == not_found) { - start = this->m_leaf_end_nanos; + start = this->m_leaf_end_seconds; continue; } - return s + this->m_leaf_start_nanos; + return s + this->m_leaf_start_seconds; } return not_found; } @@ -1437,45 +1442,14 @@ class TimestampNode : public ParentNode { virtual std::string describe(util::serializer::SerialisationState& state) const override { REALM_ASSERT(m_condition_column != nullptr); - return state.describe_column(ParentNode::m_table, m_condition_column->get_column_index()) - + " " + TConditionFunction::description() + " " + util::serializer::print_value(TimestampNode::m_value); + return state.describe_column(ParentNode::m_table, m_condition_column->get_column_index()) + " " + + TConditionFunction::description() + " " + util::serializer::print_value(TimestampNode::m_value); } std::unique_ptr clone(QueryNodeHandoverPatches* patches) const override { return std::unique_ptr(new TimestampNode(*this, patches)); } - - TimestampNode(const TimestampNode& from, QueryNodeHandoverPatches* patches) - : ParentNode(from, patches) - , m_value(from.m_value) - , m_condition_column(from.m_condition_column) - { - if (m_condition_column && patches) - m_condition_column_idx = m_condition_column->get_column_index(); - } - -private: - Timestamp m_value; - const TimestampColumn* m_condition_column; - TConditionFunction condition; - - // Leaf cache seconds - using LeafCacheStorageSeconds = typename std::aligned_storage::type; - LeafCacheStorageSeconds m_leaf_cache_storage_seconds; - std::unique_ptr m_array_ptr_seconds; - const LeafTypeSeconds* m_leaf_ptr_seconds = nullptr; - size_t m_leaf_start_seconds = npos; - size_t m_leaf_end_seconds = 0; - size_t m_local_end_seconds; - // Leaf cache nanoseconds - using LeafCacheStorageNanos = typename std::aligned_storage::type; - LeafCacheStorageNanos m_leaf_cache_storage_nanos; - std::unique_ptr m_array_ptr_nanos; - const LeafTypeNanos* m_leaf_ptr_nanos = nullptr; - size_t m_leaf_start_nanos = npos; - size_t m_leaf_end_nanos = 0; - size_t m_local_end_nanos; }; class StringNodeBase : public ParentNode { From 2e709350000205115297167ca5d32d1cbb4168bc Mon Sep 17 00:00:00 2001 From: James Stone Date: Tue, 13 Aug 2019 18:05:51 -0700 Subject: [PATCH 8/8] Add GreaterEqual and LessEqual specializations We take the position that you cannot query a timestamp column using <, >, <= and >= against a null value. --- CHANGELOG.md | 1 + src/realm/query_engine.cpp | 76 +++++++++++++++++++++++++ src/realm/query_engine.hpp | 11 ++++ test/benchmark-common-tasks/main.cpp | 64 ++++++++++----------- test/test_query.cpp | 22 ++++++++ test/test_shared.cpp | 84 ++++++---------------------- 6 files changed, 160 insertions(+), 98 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b0564b2468..7aed0b4111f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ ### Internals * The release binaries for Apple platforms are now built with Xcode 9.4 (up from 9.2). +* Performance of queries on Timestamp is improved ---------------------------------------------- diff --git a/src/realm/query_engine.cpp b/src/realm/query_engine.cpp index 967b5ddacd8..0ab90fcc110 100644 --- a/src/realm/query_engine.cpp +++ b/src/realm/query_engine.cpp @@ -719,6 +719,7 @@ size_t TimestampNode::find_first_local(size_t start, size_t end) return not_found; } + template <> size_t TimestampNode::find_first_local(size_t start, size_t end) { @@ -752,4 +753,79 @@ size_t TimestampNode::find_first_local(size_t start, size_t end) return not_found; } + +template <> +size_t TimestampNode::find_first_local(size_t start, size_t end) +{ + REALM_ASSERT(this->m_table); + + if (this->m_value.is_null()) { + return not_found; + } + while (start < end) { + size_t ret = this->find_first_local_seconds(start, end); + + if (ret == not_found) + return not_found; + + util::Optional seconds = get_seconds_and_cache(ret); + if (!seconds) { // null equality + start = ret + 1; + continue; + } + if (*seconds > m_value.get_seconds()) { + return ret; + } + // We now know that neither m_value nor current value is null and that seconds part equals + // We are just missing to compare nanoseconds part + int32_t nanos = this->get_nanoseconds_and_cache(ret); + if (nanos >= m_value.get_nanoseconds()) { + return ret; + } + start = ret + 1; + } + + return not_found; +} + +template <> +size_t TimestampNode::find_first_local(size_t start, size_t end) +{ + REALM_ASSERT(this->m_table); + + if (this->m_value.is_null()) { + return not_found; + } + while (start < end) { + size_t ret = this->find_first_local_seconds(start, end); + + if (ret == not_found) + return not_found; + + util::Optional seconds = get_seconds_and_cache(ret); + if (!seconds) { // null equality + start = ret + 1; + continue; + } + if (*seconds < m_value.get_seconds()) { + return ret; + } + // We now know that neither m_value nor current value is null and that seconds part equals + // We are just missing to compare nanoseconds part + int32_t nanos = this->get_nanoseconds_and_cache(ret); + if (nanos <= m_value.get_nanoseconds()) { + return ret; + } + start = ret + 1; + } + + return not_found; +} +#ifdef _WIN32 +// Explicit instantiation required on some windows builds +template size_t TimestampNode::find_first_local(size_t start, size_t end); +template size_t TimestampNode::find_first_local(size_t start, size_t end); +template size_t TimestampNode::find_first_local(size_t start, size_t end); +template size_t TimestampNode::find_first_local(size_t start, size_t end); +#endif } // namespace realm diff --git a/src/realm/query_engine.hpp b/src/realm/query_engine.hpp index e78be60b849..7f87be1bf3c 100644 --- a/src/realm/query_engine.hpp +++ b/src/realm/query_engine.hpp @@ -1452,6 +1452,17 @@ class TimestampNode : public TimestampNodeBase { } }; +template <> +size_t TimestampNode::find_first_local(size_t start, size_t end); +template <> +size_t TimestampNode::find_first_local(size_t start, size_t end); +template <> +size_t TimestampNode::find_first_local(size_t start, size_t end); +template <> +size_t TimestampNode::find_first_local(size_t start, size_t end); + + + class StringNodeBase : public ParentNode { public: using TConditionValue = StringData; diff --git a/test/benchmark-common-tasks/main.cpp b/test/benchmark-common-tasks/main.cpp index f406a83e25d..601dbed1bf0 100644 --- a/test/benchmark-common-tasks/main.cpp +++ b/test/benchmark-common-tasks/main.cpp @@ -1233,38 +1233,38 @@ int benchmark_common_tasks_main() #define BENCH(B) run_benchmark(results) -// BENCH(BenchmarkUnorderedTableViewClear); -// BENCH(BenchmarkEmptyCommit); -// BENCH(AddTable); -// BENCH(BenchmarkQuery); -// BENCH(BenchmarkQueryNot); -// BENCH(BenchmarkSize); -// BENCH(BenchmarkSort); -// BENCH(BenchmarkSortInt); -// BENCH(BenchmarkDistinctIntFewDupes); -// BENCH(BenchmarkDistinctIntManyDupes); -// BENCH(BenchmarkDistinctStringFewDupes); -// BENCH(BenchmarkDistinctStringManyDupes); -// BENCH(BenchmarkFindAllStringFewDupes); -// BENCH(BenchmarkFindAllStringManyDupes); -// BENCH(BenchmarkFindFirstStringFewDupes); -// BENCH(BenchmarkFindFirstStringManyDupes); -// BENCH(BenchmarkInsert); -// BENCH(BenchmarkGetString); -// BENCH(BenchmarkSetString); -// BENCH(BenchmarkCreateIndex); -// BENCH(BenchmarkGetLongString); -// BENCH(BenchmarkQueryLongString); -// BENCH(BenchmarkSetLongString); -// BENCH(BenchmarkGetLinkList); -// BENCH(BenchmarkQueryInsensitiveString); -// BENCH(BenchmarkQueryInsensitiveStringIndexed); -// BENCH(BenchmarkNonInitatorOpen); -// BENCH(BenchmarkQueryChainedOrStrings); -// BENCH(BenchmarkQueryChainedOrInts); -// BENCH(BenchmarkQueryChainedOrIntsIndexed); -// BENCH(BenchmarkQueryIntEquality); -// BENCH(BenchmarkQueryIntEqualityIndexed); + BENCH(BenchmarkUnorderedTableViewClear); + BENCH(BenchmarkEmptyCommit); + BENCH(AddTable); + BENCH(BenchmarkQuery); + BENCH(BenchmarkQueryNot); + BENCH(BenchmarkSize); + BENCH(BenchmarkSort); + BENCH(BenchmarkSortInt); + BENCH(BenchmarkDistinctIntFewDupes); + BENCH(BenchmarkDistinctIntManyDupes); + BENCH(BenchmarkDistinctStringFewDupes); + BENCH(BenchmarkDistinctStringManyDupes); + BENCH(BenchmarkFindAllStringFewDupes); + BENCH(BenchmarkFindAllStringManyDupes); + BENCH(BenchmarkFindFirstStringFewDupes); + BENCH(BenchmarkFindFirstStringManyDupes); + BENCH(BenchmarkInsert); + BENCH(BenchmarkGetString); + BENCH(BenchmarkSetString); + BENCH(BenchmarkCreateIndex); + BENCH(BenchmarkGetLongString); + BENCH(BenchmarkQueryLongString); + BENCH(BenchmarkSetLongString); + BENCH(BenchmarkGetLinkList); + BENCH(BenchmarkQueryInsensitiveString); + BENCH(BenchmarkQueryInsensitiveStringIndexed); + BENCH(BenchmarkNonInitatorOpen); + BENCH(BenchmarkQueryChainedOrStrings); + BENCH(BenchmarkQueryChainedOrInts); + BENCH(BenchmarkQueryChainedOrIntsIndexed); + BENCH(BenchmarkQueryIntEquality); + BENCH(BenchmarkQueryIntEqualityIndexed); BENCH(BenchmarkQueryTimestampGreater); BENCH(BenchmarkQueryTimestampGreaterEqual); diff --git a/test/test_query.cpp b/test/test_query.cpp index 7df0d5b4169..ca005f2c28f 100644 --- a/test/test_query.cpp +++ b/test/test_query.cpp @@ -10685,6 +10685,28 @@ TEST(Query_Timestamp) CHECK_EQUAL(match, npos); // Note that (null < null) == false } +TEST(Query_TimestampCount) +{ + Table table; + auto col_date = table.add_column(type_Timestamp, "date", true); + for (int i = 0; i < 10; i++) { + auto ndx = table.add_empty_row(); + table.set_timestamp(col_date, ndx, Timestamp(i / 4, i % 4)); + } + table.set_null(col_date, 5); + + // Timestamps : {0,0}, {0,1}, {0,2}, {0,3}, {1,0}, {}, {1,2}, {1,3}, {2,0}, {2,1} + + auto timestamps = table.column(col_date); + + CHECK_EQUAL((timestamps > Timestamp(0, 3)).count(), 5); + CHECK_EQUAL((timestamps >= Timestamp(0, 3)).count(), 6); + CHECK_EQUAL((timestamps < Timestamp(1, 3)).count(), 6); + CHECK_EQUAL((timestamps <= Timestamp(1, 3)).count(), 7); + CHECK_EQUAL((timestamps == Timestamp()).count(), 1); + CHECK_EQUAL((timestamps != Timestamp()).count(), 9); +} + TEST(Query_Timestamp_Null) { // Test that querying for null on non-nullable column (with default value being non-null value) is diff --git a/test/test_shared.cpp b/test/test_shared.cpp index 04f69f6333d..9fe630f2450 100644 --- a/test/test_shared.cpp +++ b/test/test_shared.cpp @@ -4007,80 +4007,32 @@ TEST(Shared_GetCommitSize) CHECK_LESS(size_after - size_before, commit_size); } } + /* +#include TEST(Shared_TimestampQuery) { - SHARED_GROUP_TEST_PATH(path); - SharedGroup sg(path); - - { - WriteTransaction wt(sg); - - auto table = wt.get_or_add_table("table"); - auto col_date = table->add_column(type_Timestamp, "date"); - auto col_value = table->add_column(type_Int, "value"); + Table table; + auto col_date = table.add_column(type_Timestamp, "date", true); - for (int i = 0; i < 10; i++) { - auto ndx = table->add_empty_row(); - table->set_timestamp(col_date, ndx, Timestamp(i / 4, i % 4)); - table->set_int(col_value, ndx, i); - } - // Timestamps : {0,0}, {0,1}, {0,2}, {0,3}, {1,0}, {1,1}, {1,2}, {1,3}, {2,0}, {2,1} - wt.commit(); - } - - { - Group& g = const_cast(sg.begin_read()); - auto table = g.get_table("table"); - auto col_date = table->get_column_index("date"); - - Query q = table->column(col_date) > Timestamp(0, 3); - auto cnt = q.count(); - CHECK_EQUAL(cnt, 6); - q = table->column(col_date) >= Timestamp(0, 3); - cnt = q.count(); - CHECK_EQUAL(cnt, 7); - q = table->column(col_date) > Timestamp(0, 3) && - table->column(col_date) < Timestamp(1, 3); - cnt = q.count(); - CHECK_EQUAL(cnt, 3); - sg.end_read(); - } - - { - WriteTransaction wt(sg); - - auto table = wt.get_table("table"); - auto col_date = table->get_column_index("date"); - auto col_value = table->get_column_index("value"); - - table->clear(); - Random random(random_int()); // Seed from slow global generator + Random random(random_int()); // Seed from slow global generator - for (int i = 0; i < 100000; i++) { - auto ndx = table->add_empty_row(); - int seconds = random.draw_int_max(3600 * 24 * 10); - table->set_timestamp(col_date, ndx, Timestamp(seconds, 0)); - table->set_int(col_value, ndx, i); - } - wt.commit(); + for (int i = 0; i < 10000; i++) { + auto ndx = table.add_empty_row(); + int seconds = random.draw_int_max(3600 * 24 * 10); + table.set_timestamp(col_date, ndx, Timestamp(seconds, 0)); } - { - Group& g = const_cast(sg.begin_read()); - auto table = g.get_table("table"); - auto col_date = table->get_column_index("date"); + Query q = table.column(col_date) > Timestamp(3600 * 24 * 5, 3); + auto start = std::chrono::steady_clock::now(); + CALLGRIND_START_INSTRUMENTATION; + auto cnt = q.count(); + CALLGRIND_STOP_INSTRUMENTATION; + auto end = std::chrono::steady_clock::now(); - Query q = table->column(col_date) > Timestamp(3600 * 24 * 5, 3); - auto start = std::chrono::steady_clock::now(); - auto cnt = q.count(); - auto end = std::chrono::steady_clock::now(); - - std::cout << "Time: " << std::chrono::duration_cast(end - start).count() << " us" - << std::endl; - CHECK_GREATER(cnt, 50000); - sg.end_read(); - } + std::cout << "Time: " << std::chrono::duration_cast(end - start).count() << " us" + << std::endl; + CHECK_GREATER(cnt, 50000); } */