-
Notifications
You must be signed in to change notification settings - Fork 6.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Write-side support for FAISS IVF indices
Summary: The patch adds initial support for backing FAISS's inverted file based indices with data stored in RocksDB. It introduces a `SecondaryIndex` implementation called `FaissIVFIndex` which takes ownership of a `faiss::IndexIVF` object. During indexing, `FaissIVFIndex` treats the original value of the specified primary column as an embedding vector, and passes it to the provided FAISS index object to perform quantization. It replaces the original embedding vector with the result of the coarse quantizer (i.e. the inverted list id), and puts the result of the fine quantizer (if any) into the secondary index value. Note that this patch is only one half of the equation; it provides a way of storing FAISS inverted lists in RocksDB but there is currently no retrieval/search support (this will be a follow-up change). Also, the integration currently works only with our internal Buck build. I plan to add support for `cmake` / `make` based builds similarly to how we handle Folly. Differential Revision: D66907065
- Loading branch information
1 parent
31408c0
commit b1325ab
Showing
6 changed files
with
431 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,179 @@ | ||
// Copyright (c) Meta Platforms, Inc. and affiliates. | ||
// This source code is licensed under both the GPLv2 (found in the | ||
// COPYING file in the root directory) and Apache 2.0 License | ||
// (found in the LICENSE.Apache file in the root directory). | ||
|
||
#include "utilities/secondary_index/faiss_ivf_index.h" | ||
|
||
#include <cassert> | ||
|
||
#include "faiss/invlists/InvertedLists.h" | ||
#include "util/coding.h" | ||
|
||
namespace ROCKSDB_NAMESPACE { | ||
|
||
class FaissIVFIndex::Adapter : public faiss::InvertedLists { | ||
public: | ||
Adapter(size_t num_lists, size_t code_size) | ||
: faiss::InvertedLists(num_lists, code_size) { | ||
use_iterator = true; | ||
} | ||
|
||
// Non-iterator-based read interface; not implemented/used since use_iterator | ||
// is true | ||
size_t list_size(size_t /* list_no */) const override { | ||
assert(false); | ||
return 0; | ||
} | ||
|
||
const uint8_t* get_codes(size_t /* list_no */) const override { | ||
assert(false); | ||
return nullptr; | ||
} | ||
|
||
const faiss::idx_t* get_ids(size_t /* list_no */) const override { | ||
assert(false); | ||
return nullptr; | ||
} | ||
|
||
// Iterator-based read interface; not yet implemented | ||
faiss::InvertedListsIterator* get_iterator( | ||
size_t /* list_no */, | ||
void* /* inverted_list_context */ = nullptr) const override { | ||
// TODO: implement this | ||
|
||
assert(false); | ||
return nullptr; | ||
} | ||
|
||
// Write interface; only add_entry is implemented/required for now | ||
size_t add_entry(size_t /* list_no */, faiss::idx_t /* id */, | ||
const uint8_t* code, | ||
void* inverted_list_context = nullptr) override { | ||
std::string* const codes = static_cast<std::string*>(inverted_list_context); | ||
assert(codes); | ||
|
||
codes->assign(reinterpret_cast<const char*>(code), code_size); | ||
|
||
return 0; | ||
} | ||
|
||
size_t add_entries(size_t /* list_no */, size_t /* num_entries */, | ||
const faiss::idx_t* /* ids */, | ||
const uint8_t* /* code */) override { | ||
assert(false); | ||
return 0; | ||
} | ||
|
||
void update_entry(size_t /* list_no */, size_t /* offset */, | ||
faiss::idx_t /* id */, const uint8_t* /* code */) override { | ||
assert(false); | ||
} | ||
|
||
void update_entries(size_t /* list_no */, size_t /* offset */, | ||
size_t /* num_entries */, const faiss::idx_t* /* ids */, | ||
const uint8_t* /* code */) override { | ||
assert(false); | ||
} | ||
|
||
void resize(size_t /* list_no */, size_t /* new_size */) override { | ||
assert(false); | ||
} | ||
}; | ||
|
||
FaissIVFIndex::FaissIVFIndex(std::unique_ptr<faiss::IndexIVF>&& index, | ||
std::string primary_column_name) | ||
: adapter_(std::make_unique<Adapter>(index->nlist, index->code_size)), | ||
index_(std::move(index)), | ||
primary_column_name_(std::move(primary_column_name)) { | ||
assert(index_); | ||
assert(index_->quantizer); | ||
|
||
index_->replace_invlists(adapter_.get()); | ||
} | ||
|
||
FaissIVFIndex::~FaissIVFIndex() = default; | ||
|
||
void FaissIVFIndex::SetPrimaryColumnFamily(ColumnFamilyHandle* column_family) { | ||
assert(column_family); | ||
primary_column_family_ = column_family; | ||
} | ||
|
||
void FaissIVFIndex::SetSecondaryColumnFamily( | ||
ColumnFamilyHandle* column_family) { | ||
assert(column_family); | ||
secondary_column_family_ = column_family; | ||
} | ||
|
||
ColumnFamilyHandle* FaissIVFIndex::GetPrimaryColumnFamily() const { | ||
return primary_column_family_; | ||
} | ||
|
||
ColumnFamilyHandle* FaissIVFIndex::GetSecondaryColumnFamily() const { | ||
return secondary_column_family_; | ||
} | ||
|
||
Slice FaissIVFIndex::GetPrimaryColumnName() const { | ||
return primary_column_name_; | ||
} | ||
|
||
Status FaissIVFIndex::UpdatePrimaryColumnValue( | ||
const Slice& /* primary_key */, const Slice& primary_column_value, | ||
std::optional<std::variant<Slice, std::string>>* updated_column_value) | ||
const { | ||
assert(updated_column_value); | ||
|
||
if (primary_column_value.size() != index_->d * sizeof(float)) { | ||
return Status::InvalidArgument( | ||
"Incorrectly sized vector passed to FaissIVFIndex"); | ||
} | ||
|
||
constexpr faiss::idx_t n = 1; | ||
faiss::idx_t label = -1; | ||
|
||
index_->quantizer->assign( | ||
n, reinterpret_cast<const float*>(primary_column_value.data()), &label); | ||
|
||
std::string label_str; | ||
PutVarsignedint64(&label_str, label); | ||
|
||
updated_column_value->emplace(std::move(label_str)); | ||
|
||
return Status::OK(); | ||
} | ||
|
||
Status FaissIVFIndex::GetSecondaryKeyPrefix( | ||
const Slice& /* primary_key */, const Slice& primary_column_value, | ||
std::variant<Slice, std::string>* secondary_key_prefix) const { | ||
assert(secondary_key_prefix); | ||
|
||
*secondary_key_prefix = primary_column_value; | ||
|
||
return Status::OK(); | ||
} | ||
|
||
Status FaissIVFIndex::GetSecondaryValue( | ||
const Slice& /* primary_key */, const Slice& primary_column_value, | ||
const Slice& original_column_value, | ||
std::optional<std::variant<Slice, std::string>>* secondary_value) const { | ||
assert(secondary_value); | ||
|
||
Slice label_slice = primary_column_value; | ||
faiss::idx_t label = -1; | ||
[[maybe_unused]] const bool ok = GetVarsignedint64(&label_slice, &label); | ||
assert(ok); | ||
|
||
constexpr faiss::idx_t n = 1; | ||
constexpr faiss::idx_t* xids = nullptr; | ||
std::string codes; | ||
|
||
index_->add_core(n, | ||
reinterpret_cast<const float*>(original_column_value.data()), | ||
xids, &label, &codes); | ||
|
||
secondary_value->emplace(std::move(codes)); | ||
|
||
return Status::OK(); | ||
} | ||
|
||
} // namespace ROCKSDB_NAMESPACE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
// Copyright (c) Meta Platforms, Inc. and affiliates. | ||
// | ||
// This source code is licensed under both the GPLv2 (found in the | ||
// COPYING file in the root directory) and Apache 2.0 License | ||
// (found in the LICENSE.Apache file in the root directory). | ||
|
||
#pragma once | ||
|
||
#include <memory> | ||
|
||
#include "faiss/IndexIVF.h" | ||
#include "rocksdb/utilities/secondary_index.h" | ||
|
||
namespace ROCKSDB_NAMESPACE { | ||
|
||
// A SecondaryIndex implementation that wraps a FAISS inverted file index. | ||
class FaissIVFIndex : public SecondaryIndex { | ||
public: | ||
explicit FaissIVFIndex(std::unique_ptr<faiss::IndexIVF>&& index, | ||
std::string primary_column_name); | ||
~FaissIVFIndex() override; | ||
|
||
void SetPrimaryColumnFamily(ColumnFamilyHandle* column_family) override; | ||
void SetSecondaryColumnFamily(ColumnFamilyHandle* column_family) override; | ||
|
||
ColumnFamilyHandle* GetPrimaryColumnFamily() const override; | ||
ColumnFamilyHandle* GetSecondaryColumnFamily() const override; | ||
|
||
Slice GetPrimaryColumnName() const override; | ||
|
||
Status UpdatePrimaryColumnValue( | ||
const Slice& primary_key, const Slice& primary_column_value, | ||
std::optional<std::variant<Slice, std::string>>* updated_column_value) | ||
const override; | ||
|
||
Status GetSecondaryKeyPrefix( | ||
const Slice& primary_key, const Slice& primary_column_value, | ||
std::variant<Slice, std::string>* secondary_key_prefix) const override; | ||
|
||
Status GetSecondaryValue(const Slice& primary_key, | ||
const Slice& primary_column_value, | ||
const Slice& original_column_value, | ||
std::optional<std::variant<Slice, std::string>>* | ||
secondary_value) const override; | ||
|
||
private: | ||
class Adapter; | ||
|
||
std::unique_ptr<Adapter> adapter_; | ||
std::unique_ptr<faiss::IndexIVF> index_; | ||
std::string primary_column_name_; | ||
ColumnFamilyHandle* primary_column_family_{}; | ||
ColumnFamilyHandle* secondary_column_family_{}; | ||
}; | ||
|
||
} // namespace ROCKSDB_NAMESPACE |
Oops, something went wrong.