diff --git a/include/rocksdb/utilities/secondary_index.h b/include/rocksdb/utilities/secondary_index.h index c6734cfc8722..735f005ddb2b 100644 --- a/include/rocksdb/utilities/secondary_index.h +++ b/include/rocksdb/utilities/secondary_index.h @@ -6,10 +6,13 @@ #pragma once +#include #include #include #include +#include "rocksdb/iterator.h" +#include "rocksdb/options.h" #include "rocksdb/rocksdb_namespace.h" #include "rocksdb/slice.h" #include "rocksdb/status.h" @@ -96,6 +99,31 @@ class SecondaryIndex { const Slice& primary_column_value, const Slice& previous_column_value, std::optional>* secondary_value) const = 0; + + // Create an iterator that can be used by applications to query the index. + // This method takes a ReadOptions structure, which can be used by + // applications to provide (implementation-specific) query parameters to the + // index, and an underlying iterator, which should be an iterator over the + // index's secondary column family, and is expected to be leveraged by the + // returned iterator to read the actual secondary index entries. (Providing + // the underlying iterator this way enables querying the index as of a + // specific point in time for example.) + // + // Querying the index can be performed by calling the returned iterator's + // Seek API with a search target, and then using Next (and potentially + // Prev) to iterate through the matching index entries. SeekToFirst, + // SeekToLast, and SeekForPrev are not expected to be supported by the + // iterator. The iterator should expose primary keys, that is, the secondary + // key prefix should be stripped from the index entries. + // + // The exact semantics of the returned iterator depend on the index and are + // implementation-specific. For simple indices, the search target might be a + // primary column value, and the iterator might return all primary keys that + // have the given column value; however, other semantics are also possible. + // For vector indices, the search target might be a vector, and the iterator + // might return similar vectors from the index. + virtual std::unique_ptr NewIterator( + const ReadOptions& read_options, Iterator* underlying_it) const = 0; }; } // namespace ROCKSDB_NAMESPACE diff --git a/utilities/secondary_index/faiss_ivf_index.cc b/utilities/secondary_index/faiss_ivf_index.cc index 0ad11411951d..1f897c516978 100644 --- a/utilities/secondary_index/faiss_ivf_index.cc +++ b/utilities/secondary_index/faiss_ivf_index.cc @@ -210,4 +210,11 @@ Status FaissIVFIndex::GetSecondaryValue( return Status::OK(); } +std::unique_ptr FaissIVFIndex::NewIterator( + const ReadOptions& /* read_options */, + Iterator* /* underlying_it */) const { + // TODO: implement this + return std::unique_ptr(NewErrorIterator(Status::NotSupported())); +} + } // namespace ROCKSDB_NAMESPACE diff --git a/utilities/secondary_index/faiss_ivf_index.h b/utilities/secondary_index/faiss_ivf_index.h index 78463c22cd4c..ab601552057f 100644 --- a/utilities/secondary_index/faiss_ivf_index.h +++ b/utilities/secondary_index/faiss_ivf_index.h @@ -43,6 +43,9 @@ class FaissIVFIndex : public SecondaryIndex { std::optional>* secondary_value) const override; + std::unique_ptr NewIterator(const ReadOptions& read_options, + Iterator* underlying_it) const override; + private: class Adapter; diff --git a/utilities/transactions/transaction_test.cc b/utilities/transactions/transaction_test.cc index fd1715ea0f67..09083b0d8a68 100644 --- a/utilities/transactions/transaction_test.cc +++ b/utilities/transactions/transaction_test.cc @@ -26,6 +26,7 @@ #include "test_util/testharness.h" #include "test_util/testutil.h" #include "test_util/transaction_test_util.h" +#include "util/overload.h" #include "util/random.h" #include "util/string_util.h" #include "utilities/merge_operators.h" @@ -8083,7 +8084,120 @@ TEST_P(TransactionTest, SecondaryIndex) { return Status::OK(); } + std::unique_ptr NewIterator( + const ReadOptions& /* read_options */, + Iterator* underlying_it) const override { + return std::make_unique(this, underlying_it); + } + private: + class FooIterator : public Iterator { + public: + FooIterator(const SecondaryIndex* index, Iterator* underlying_it) + : index_(index), underlying_it_(underlying_it) { + assert(index_); + assert(underlying_it_); + } + + bool Valid() const override { + return status_.ok() && underlying_it_->Valid() && + underlying_it_->key().starts_with(prefix_); + } + + void SeekToFirst() override { + status_ = Status::NotSupported("SeekToFirst"); + } + + void SeekToLast() override { + status_ = Status::NotSupported("SeekToLast"); + } + + void Seek(const Slice& target) override { + status_ = Status::OK(); + + std::variant prefix; + + const Status s = index_->GetSecondaryKeyPrefix(target, &prefix); + if (!s.ok()) { + status_ = s; + return; + } + + prefix_ = std::visit( + overload{ + [](const Slice& value) -> std::string { + return value.ToString(); + }, + [](const std::string& value) -> std::string { return value; }}, + prefix); + + underlying_it_->Seek(prefix_); + } + + void SeekForPrev(const Slice& /* target */) override { + status_ = Status::NotSupported("SeekForPrev"); + } + + void Next() override { + assert(Valid()); + + underlying_it_->Next(); + } + + void Prev() override { + assert(Valid()); + + underlying_it_->Prev(); + } + + bool PrepareValue() override { + assert(Valid()); + + return underlying_it_->PrepareValue(); + } + + Status status() const override { + if (!status_.ok()) { + return status_; + } + + return underlying_it_->status(); + } + + Slice key() const override { + assert(Valid()); + + Slice key = underlying_it_->key(); + key.remove_prefix(prefix_.size()); + + return key; + } + + Slice value() const override { + assert(Valid()); + + return underlying_it_->value(); + } + + const WideColumns& columns() const override { + assert(Valid()); + + return underlying_it_->columns(); + } + + Slice timestamp() const override { + assert(Valid()); + + return Slice(); + } + + private: + const SecondaryIndex* index_; + Iterator* underlying_it_; + Status status_; + std::string prefix_; + }; + ColumnFamilyHandle* primary_cfh_{}; ColumnFamilyHandle* secondary_cfh_{}; }; @@ -8181,6 +8295,7 @@ TEST_P(TransactionTest, SecondaryIndex) { } { + // Read the raw secondary index entries from CF2 std::unique_ptr it(db->NewIterator(ReadOptions(), cfh2)); it->SeekToFirst(); @@ -8198,6 +8313,58 @@ TEST_P(TransactionTest, SecondaryIndex) { ASSERT_OK(it->status()); } + { + // Query the secondary index + std::unique_ptr underlying_it( + db->NewIterator(ReadOptions(), cfh2)); + std::unique_ptr it( + index->NewIterator(ReadOptions(), underlying_it.get())); + + it->SeekToFirst(); + ASSERT_FALSE(it->Valid()); + ASSERT_TRUE(it->status().IsNotSupported()); + + it->SeekToLast(); + ASSERT_FALSE(it->Valid()); + ASSERT_TRUE(it->status().IsNotSupported()); + + it->SeekForPrev("box"); + ASSERT_FALSE(it->Valid()); + ASSERT_TRUE(it->status().IsNotSupported()); + + it->Seek("box"); // last character used for indexing: x + ASSERT_TRUE(it->Valid()); + ASSERT_OK(it->status()); + ASSERT_EQ(it->key(), "key3"); + ASSERT_EQ(it->value(), "zab"); + + it->Next(); + ASSERT_TRUE(it->Valid()); + ASSERT_OK(it->status()); + ASSERT_EQ(it->key(), "key4"); + ASSERT_EQ(it->value(), "xuuq"); + + it->Prev(); + ASSERT_TRUE(it->Valid()); + ASSERT_OK(it->status()); + ASSERT_EQ(it->key(), "key3"); + ASSERT_EQ(it->value(), "zab"); + + it->Next(); + ASSERT_TRUE(it->Valid()); + ASSERT_OK(it->status()); + ASSERT_EQ(it->key(), "key4"); + ASSERT_EQ(it->value(), "xuuq"); + + it->Next(); + ASSERT_FALSE(it->Valid()); + ASSERT_OK(it->status()); + + it->Seek("toy"); // last character used for indexing: y + ASSERT_FALSE(it->Valid()); + ASSERT_OK(it->status()); + } + // Make some updates to the key-values indexed above through the database // interface (i.e. using implicit transactions) @@ -8256,6 +8423,7 @@ TEST_P(TransactionTest, SecondaryIndex) { } { + // Read the raw secondary index entries from CF2 std::unique_ptr it(db->NewIterator(ReadOptions(), cfh2)); it->SeekToFirst(); @@ -8272,6 +8440,46 @@ TEST_P(TransactionTest, SecondaryIndex) { ASSERT_FALSE(it->Valid()); ASSERT_OK(it->status()); } + + { + // Query the secondary index + std::unique_ptr underlying_it( + db->NewIterator(ReadOptions(), cfh2)); + std::unique_ptr it( + index->NewIterator(ReadOptions(), underlying_it.get())); + + it->SeekToFirst(); + ASSERT_FALSE(it->Valid()); + ASSERT_TRUE(it->status().IsNotSupported()); + + it->SeekToLast(); + ASSERT_FALSE(it->Valid()); + ASSERT_TRUE(it->status().IsNotSupported()); + + it->SeekForPrev("bot"); + ASSERT_FALSE(it->Valid()); + ASSERT_TRUE(it->status().IsNotSupported()); + + it->Seek("bot"); // last character used for indexing: t + ASSERT_TRUE(it->Valid()); + ASSERT_OK(it->status()); + ASSERT_EQ(it->key(), "key1"); + ASSERT_EQ(it->value(), "tluarg"); + + it->Next(); + ASSERT_FALSE(it->Valid()); + ASSERT_OK(it->status()); + + it->Seek("toy"); // last character used for indexing: y + ASSERT_TRUE(it->Valid()); + ASSERT_OK(it->status()); + ASSERT_EQ(it->key(), "key3"); + ASSERT_EQ(it->value(), "ylprag"); + + it->Next(); + ASSERT_FALSE(it->Valid()); + ASSERT_OK(it->status()); + } } TEST_F(TransactionDBTest, CollapseKey) {