From c4b8273397251c1050a8f240d8ceec44e41ed06c Mon Sep 17 00:00:00 2001 From: Gang Liao Date: Fri, 1 Jul 2022 02:32:01 -0700 Subject: [PATCH 01/11] Eliminate the copying of blobs when serving reads from the cache Summary: The blob cache enables an optimization on the read path: when a blob is found in the cache, we can avoid copying it into the buffer provided by the application. Instead, we can simply transfer ownership of the cache handle to the target `PinnableSlice`. (Note: this relies on the `Cleanable` interface, which is implemented by `PinnableSlice`.) This has the potential to save a lot of CPU, especially with large blob values. --- db/blob/blob_source.cc | 12 +++++++++++- include/rocksdb/slice.h | 2 ++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/db/blob/blob_source.cc b/db/blob/blob_source.cc index 2b09930a0cf..5930e87ae57 100644 --- a/db/blob/blob_source.cc +++ b/db/blob/blob_source.cc @@ -134,7 +134,17 @@ Status BlobSource::GetBlob(const ReadOptions& read_options, Slice key = cache_key.AsSlice(); s = GetBlobFromCache(key, &blob_entry); if (s.ok() && blob_entry.GetValue()) { - value->PinSelf(*blob_entry.GetValue()); + { + // To avoid copying the cached blob into the buffer provided by the + // application, we can simply transfer ownership of the cache handle to + // the target PinnableSlice. This has the potential to save a lot of + // CPU, especially with large blob values. + value->data_ = blob_entry.GetValue()->data(); + value->size_ = blob_entry.GetValue()->size(); + value->Pin(); + assert(value->IsPinned()); + blob_entry.TransferTo(value); + } // For consistency, the size of on-disk (possibly compressed) blob record // is assigned to bytes_read. diff --git a/include/rocksdb/slice.h b/include/rocksdb/slice.h index 3722fc4e637..299e3b8bf26 100644 --- a/include/rocksdb/slice.h +++ b/include/rocksdb/slice.h @@ -212,6 +212,8 @@ class PinnableSlice : public Slice, public Cleanable { inline bool IsPinned() const { return pinned_; } + inline void Pin() { pinned_ = true; } + private: friend class PinnableSlice4Test; std::string self_space_; From 10f91dc554a0d56e36da486410adc38bc98b62cb Mon Sep 17 00:00:00 2001 From: Gang Liao Date: Fri, 1 Jul 2022 02:32:01 -0700 Subject: [PATCH 02/11] Follow the comments --- db/blob/blob_source.cc | 11 ++++++----- table/block_based/cachable_entry.h | 10 ++++++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/db/blob/blob_source.cc b/db/blob/blob_source.cc index 5930e87ae57..701dcb41f18 100644 --- a/db/blob/blob_source.cc +++ b/db/blob/blob_source.cc @@ -139,10 +139,6 @@ Status BlobSource::GetBlob(const ReadOptions& read_options, // application, we can simply transfer ownership of the cache handle to // the target PinnableSlice. This has the potential to save a lot of // CPU, especially with large blob values. - value->data_ = blob_entry.GetValue()->data(); - value->size_ = blob_entry.GetValue()->size(); - value->Pin(); - assert(value->IsPinned()); blob_entry.TransferTo(value); } @@ -275,7 +271,12 @@ void BlobSource::MultiGetBlobFromOneFile(const ReadOptions& read_options, if (s.ok() && blob_entry.GetValue()) { assert(req.status); *req.status = s; - req.result->PinSelf(*blob_entry.GetValue()); + + // To avoid copying the cached blob into the buffer provided by the + // application, we can simply transfer ownership of the cache handle to + // the target PinnableSlice. This has the potential to save a lot of + // CPU, especially with large blob values. + blob_entry.TransferTo(req.result); // Update the counter for the number of valid blobs read from the cache. ++cached_blob_count; diff --git a/table/block_based/cachable_entry.h b/table/block_based/cachable_entry.h index eb51e98daac..05eaae76fea 100644 --- a/table/block_based/cachable_entry.h +++ b/table/block_based/cachable_entry.h @@ -231,4 +231,14 @@ class CachableEntry { bool own_value_ = false; }; +template <> +void CachableEntry::TransferTo(Cleanable* cleanable) { + auto* slice = reinterpret_cast(cleanable); + slice->data_ = value_->data(); + slice->size_ = value_->size(); + slice->Pin(); + assert(slice->IsPinned()); + TransferTo(slice); +} + } // namespace ROCKSDB_NAMESPACE From 6272052eb6081530ebdffb8e9e61402dd949074a Mon Sep 17 00:00:00 2001 From: Gang Liao Date: Tue, 5 Jul 2022 14:33:37 -0700 Subject: [PATCH 03/11] Cleanup --- db/blob/blob_source.cc | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/db/blob/blob_source.cc b/db/blob/blob_source.cc index 701dcb41f18..74ee862543f 100644 --- a/db/blob/blob_source.cc +++ b/db/blob/blob_source.cc @@ -134,13 +134,11 @@ Status BlobSource::GetBlob(const ReadOptions& read_options, Slice key = cache_key.AsSlice(); s = GetBlobFromCache(key, &blob_entry); if (s.ok() && blob_entry.GetValue()) { - { - // To avoid copying the cached blob into the buffer provided by the - // application, we can simply transfer ownership of the cache handle to - // the target PinnableSlice. This has the potential to save a lot of - // CPU, especially with large blob values. - blob_entry.TransferTo(value); - } + // To avoid copying the cached blob into the buffer provided by the + // application, we can simply transfer ownership of the cache handle to + // the target PinnableSlice. This has the potential to save a lot of + // CPU, especially with large blob values. + blob_entry.TransferTo(value); // For consistency, the size of on-disk (possibly compressed) blob record // is assigned to bytes_read. From 3cc692e4aaf1098da4a48c33326f37cac0d755d8 Mon Sep 17 00:00:00 2001 From: Gang Liao Date: Tue, 5 Jul 2022 14:37:45 -0700 Subject: [PATCH 04/11] Change to static_cast --- table/block_based/cachable_entry.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/table/block_based/cachable_entry.h b/table/block_based/cachable_entry.h index 05eaae76fea..d66e1e109cc 100644 --- a/table/block_based/cachable_entry.h +++ b/table/block_based/cachable_entry.h @@ -233,7 +233,7 @@ class CachableEntry { template <> void CachableEntry::TransferTo(Cleanable* cleanable) { - auto* slice = reinterpret_cast(cleanable); + auto* slice = static_cast(cleanable); slice->data_ = value_->data(); slice->size_ = value_->size(); slice->Pin(); From 1648db1a048dbde7bfa71d3f3476ad0e436e8401 Mon Sep 17 00:00:00 2001 From: Gang Liao Date: Tue, 5 Jul 2022 15:13:11 -0700 Subject: [PATCH 05/11] Add new TransferTo --- table/block_based/cachable_entry.h | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/table/block_based/cachable_entry.h b/table/block_based/cachable_entry.h index d66e1e109cc..2ea78d6f668 100644 --- a/table/block_based/cachable_entry.h +++ b/table/block_based/cachable_entry.h @@ -119,6 +119,24 @@ class CachableEntry { ResetFields(); } + void TransferTo(PinnableSlice* slice) { + if (slice) { + slice->data_ = value_->data(); + slice->size_ = value_->size(); + slice->Pin(); + assert(slice->IsPinned()); + + if (cache_handle_ != nullptr) { + assert(cache_ != nullptr); + slice->RegisterCleanup(&ReleaseCacheHandle, cache_, cache_handle_); + } else if (own_value_) { + slice->RegisterCleanup(&DeleteValue, value_, nullptr); + } + } + + ResetFields(); + } + void TransferTo(Cleanable* cleanable) { if (cleanable) { if (cache_handle_ != nullptr) { @@ -229,16 +247,6 @@ class CachableEntry { Cache* cache_ = nullptr; Cache::Handle* cache_handle_ = nullptr; bool own_value_ = false; -}; - -template <> -void CachableEntry::TransferTo(Cleanable* cleanable) { - auto* slice = static_cast(cleanable); - slice->data_ = value_->data(); - slice->size_ = value_->size(); - slice->Pin(); - assert(slice->IsPinned()); - TransferTo(slice); -} +}; } // namespace ROCKSDB_NAMESPACE From 4514dae2d4c0befd1e0c0d39ebde71ff73dbf9ba Mon Sep 17 00:00:00 2001 From: Gang Liao Date: Tue, 5 Jul 2022 15:15:01 -0700 Subject: [PATCH 06/11] Fix clang-format --- table/block_based/cachable_entry.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/table/block_based/cachable_entry.h b/table/block_based/cachable_entry.h index 2ea78d6f668..a131f448fe8 100644 --- a/table/block_based/cachable_entry.h +++ b/table/block_based/cachable_entry.h @@ -247,6 +247,6 @@ class CachableEntry { Cache* cache_ = nullptr; Cache::Handle* cache_handle_ = nullptr; bool own_value_ = false; -}; +}; } // namespace ROCKSDB_NAMESPACE From 01c1d8fdaa46db695b86c476e8fc65d7633fd7bb Mon Sep 17 00:00:00 2001 From: Gang Liao Date: Tue, 5 Jul 2022 15:50:38 -0700 Subject: [PATCH 07/11] Follow the comments --- include/rocksdb/slice.h | 2 -- table/block_based/cachable_entry.h | 10 +++------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/include/rocksdb/slice.h b/include/rocksdb/slice.h index 299e3b8bf26..3722fc4e637 100644 --- a/include/rocksdb/slice.h +++ b/include/rocksdb/slice.h @@ -212,8 +212,6 @@ class PinnableSlice : public Slice, public Cleanable { inline bool IsPinned() const { return pinned_; } - inline void Pin() { pinned_ = true; } - private: friend class PinnableSlice4Test; std::string self_space_; diff --git a/table/block_based/cachable_entry.h b/table/block_based/cachable_entry.h index a131f448fe8..9f9ff953750 100644 --- a/table/block_based/cachable_entry.h +++ b/table/block_based/cachable_entry.h @@ -121,16 +121,12 @@ class CachableEntry { void TransferTo(PinnableSlice* slice) { if (slice) { - slice->data_ = value_->data(); - slice->size_ = value_->size(); - slice->Pin(); - assert(slice->IsPinned()); - + slice->Reset(); if (cache_handle_ != nullptr) { assert(cache_ != nullptr); - slice->RegisterCleanup(&ReleaseCacheHandle, cache_, cache_handle_); + slice->PinSlice(*value_, &ReleaseCacheHandle, cache_, cache_handle_); } else if (own_value_) { - slice->RegisterCleanup(&DeleteValue, value_, nullptr); + slice->PinSlice(*value_, &DeleteValue, value_, nullptr); } } From 008bc9b92574798bf82621fbf6bea317b551f0f1 Mon Sep 17 00:00:00 2001 From: Gang Liao Date: Tue, 5 Jul 2022 17:02:45 -0700 Subject: [PATCH 08/11] Follow the comments --- table/block_based/cachable_entry.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/table/block_based/cachable_entry.h b/table/block_based/cachable_entry.h index 9f9ff953750..0ee4fcdce6d 100644 --- a/table/block_based/cachable_entry.h +++ b/table/block_based/cachable_entry.h @@ -120,13 +120,18 @@ class CachableEntry { } void TransferTo(PinnableSlice* slice) { - if (slice) { - slice->Reset(); - if (cache_handle_ != nullptr) { - assert(cache_ != nullptr); - slice->PinSlice(*value_, &ReleaseCacheHandle, cache_, cache_handle_); - } else if (own_value_) { - slice->PinSlice(*value_, &DeleteValue, value_, nullptr); + if constexpr (std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || std::is_same_v) { + if (slice) { + slice->Reset(); + if (cache_handle_ != nullptr) { + assert(cache_ != nullptr); + slice->PinSlice(*value_, &ReleaseCacheHandle, cache_, cache_handle_); + } else if (own_value_) { + slice->PinSlice(*value_, &DeleteValue, value_, nullptr); + } } } From 558087edece7a020f286519f5c7a358388be5c26 Mon Sep 17 00:00:00 2001 From: Gang Liao Date: Tue, 5 Jul 2022 19:25:27 -0700 Subject: [PATCH 09/11] Cleanup --- table/block_based/cachable_entry.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/table/block_based/cachable_entry.h b/table/block_based/cachable_entry.h index 0ee4fcdce6d..0b6677fb25d 100644 --- a/table/block_based/cachable_entry.h +++ b/table/block_based/cachable_entry.h @@ -120,10 +120,7 @@ class CachableEntry { } void TransferTo(PinnableSlice* slice) { - if constexpr (std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || std::is_same_v) { + if constexpr (std::is_same_v) { if (slice) { slice->Reset(); if (cache_handle_ != nullptr) { From 4c75c3294ea6765621554f9f8e6e1c3bb01d659d Mon Sep 17 00:00:00 2001 From: Gang Liao Date: Wed, 6 Jul 2022 14:23:32 -0700 Subject: [PATCH 10/11] Follow the comments --- cache/cache_helpers.h | 20 ++++++++ db/blob/blob_source.cc | 82 +++++++++++++++++++----------- db/blob/blob_source.h | 4 +- table/block_based/cachable_entry.h | 16 ------ 4 files changed, 75 insertions(+), 47 deletions(-) diff --git a/cache/cache_helpers.h b/cache/cache_helpers.h index 4b784939613..e43e65e6244 100644 --- a/cache/cache_helpers.h +++ b/cache/cache_helpers.h @@ -84,6 +84,16 @@ class CacheHandleGuard { Cache::Handle* GetCacheHandle() const { return handle_; } T* GetValue() const { return value_; } + void TransferTo(Cleanable* cleanable) { + if (cleanable) { + if (handle_ != nullptr) { + assert(cache_); + cleanable->RegisterCleanup(&ReleaseHandle, cache_, handle_); + } + } + ResetFields(); + } + void Reset() { ReleaseHandle(); ResetFields(); @@ -105,6 +115,16 @@ class CacheHandleGuard { value_ = nullptr; } + static void ReleaseHandle(void* arg1, void* arg2) { + Cache* const cache = static_cast(arg1); + assert(cache); + + Cache::Handle* const cache_handle = static_cast(arg2); + assert(cache_handle); + + cache->Release(cache_handle); + } + private: Cache* cache_ = nullptr; Cache::Handle* handle_ = nullptr; diff --git a/db/blob/blob_source.cc b/db/blob/blob_source.cc index 74ee862543f..b09a2a72928 100644 --- a/db/blob/blob_source.cc +++ b/db/blob/blob_source.cc @@ -30,7 +30,7 @@ BlobSource::BlobSource(const ImmutableOptions* immutable_options, BlobSource::~BlobSource() = default; Status BlobSource::GetBlobFromCache(const Slice& cache_key, - CachableEntry* blob) const { + CacheHandleGuard* blob) const { assert(blob); assert(blob->IsEmpty()); assert(blob_cache_); @@ -39,9 +39,7 @@ Status BlobSource::GetBlobFromCache(const Slice& cache_key, Cache::Handle* cache_handle = nullptr; cache_handle = GetEntryFromCache(cache_key); if (cache_handle != nullptr) { - blob->SetCachedValue( - static_cast(blob_cache_->Value(cache_handle)), - blob_cache_.get(), cache_handle); + *blob = CacheHandleGuard(blob_cache_.get(), cache_handle); return Status::OK(); } @@ -51,7 +49,7 @@ Status BlobSource::GetBlobFromCache(const Slice& cache_key, } Status BlobSource::PutBlobIntoCache(const Slice& cache_key, - CachableEntry* cached_blob, + CacheHandleGuard* cached_blob, PinnableSlice* blob) const { assert(blob); assert(!cache_key.empty()); @@ -74,7 +72,8 @@ Status BlobSource::PutBlobIntoCache(const Slice& cache_key, priority); if (s.ok()) { assert(cache_handle != nullptr); - cached_blob->SetCachedValue(buf, blob_cache_.get(), cache_handle); + *cached_blob = + CacheHandleGuard(blob_cache_.get(), cache_handle); } return s; @@ -125,20 +124,32 @@ Status BlobSource::GetBlob(const ReadOptions& read_options, const CacheKey cache_key = GetCacheKey(file_number, file_size, offset); - CachableEntry blob_entry; + CacheHandleGuard blob_handle; // First, try to get the blob from the cache // // If blob cache is enabled, we'll try to read from it. if (blob_cache_) { Slice key = cache_key.AsSlice(); - s = GetBlobFromCache(key, &blob_entry); - if (s.ok() && blob_entry.GetValue()) { - // To avoid copying the cached blob into the buffer provided by the - // application, we can simply transfer ownership of the cache handle to - // the target PinnableSlice. This has the potential to save a lot of - // CPU, especially with large blob values. - blob_entry.TransferTo(value); + s = GetBlobFromCache(key, &blob_handle); + if (s.ok() && blob_handle.GetValue()) { + { + value->Reset(); + // To avoid copying the cached blob into the buffer provided by the + // application, we can simply transfer ownership of the cache handle to + // the target PinnableSlice. This has the potential to save a lot of + // CPU, especially with large blob values. + value->PinSlice( + *blob_handle.GetValue(), + [](void* arg1, void* arg2) { + Cache* const cache = static_cast(arg1); + Cache::Handle* const handle = static_cast(arg2); + cache->Release(handle); + }, + blob_handle.GetCache(), blob_handle.GetCacheHandle()); + // Make the CacheHandleGuard relinquish ownership of the handle. + blob_handle.TransferTo(nullptr); + } // For consistency, the size of on-disk (possibly compressed) blob record // is assigned to bytes_read. @@ -157,7 +168,7 @@ Status BlobSource::GetBlob(const ReadOptions& read_options, } } - assert(blob_entry.IsEmpty()); + assert(blob_handle.IsEmpty()); const bool no_io = read_options.read_tier == kBlockCacheTier; if (no_io) { @@ -196,7 +207,7 @@ Status BlobSource::GetBlob(const ReadOptions& read_options, // If filling cache is allowed and a cache is configured, try to put the // blob to the cache. Slice key = cache_key.AsSlice(); - s = PutBlobIntoCache(key, &blob_entry, value); + s = PutBlobIntoCache(key, &blob_handle, value); if (!s.ok()) { return s; } @@ -260,24 +271,37 @@ void BlobSource::MultiGetBlobFromOneFile(const ReadOptions& read_options, for (size_t i = 0; i < num_blobs; ++i) { auto& req = blob_reqs[i]; - CachableEntry blob_entry; + CacheHandleGuard blob_handle; const CacheKey cache_key = base_cache_key.WithOffset(req.offset); const Slice key = cache_key.AsSlice(); - const Status s = GetBlobFromCache(key, &blob_entry); + const Status s = GetBlobFromCache(key, &blob_handle); - if (s.ok() && blob_entry.GetValue()) { + if (s.ok() && blob_handle.GetValue()) { assert(req.status); *req.status = s; - // To avoid copying the cached blob into the buffer provided by the - // application, we can simply transfer ownership of the cache handle to - // the target PinnableSlice. This has the potential to save a lot of - // CPU, especially with large blob values. - blob_entry.TransferTo(req.result); + { + req.result->Reset(); + // To avoid copying the cached blob into the buffer provided by the + // application, we can simply transfer ownership of the cache handle + // to the target PinnableSlice. This has the potential to save a lot + // of CPU, especially with large blob values. + req.result->PinSlice( + *blob_handle.GetValue(), + [](void* arg1, void* arg2) { + Cache* const cache = static_cast(arg1); + Cache::Handle* const handle = static_cast(arg2); + cache->Release(handle); + }, + blob_handle.GetCache(), blob_handle.GetCacheHandle()); + // Make the CacheHandleGuard relinquish ownership of the handle. + blob_handle.TransferTo(nullptr); + } // Update the counter for the number of valid blobs read from the cache. ++cached_blob_count; + // For consistency, the size of each on-disk (possibly compressed) blob // record is accumulated to total_bytes. uint64_t adjustment = @@ -344,11 +368,11 @@ void BlobSource::MultiGetBlobFromOneFile(const ReadOptions& read_options, // the blob(s) to the cache. for (size_t i = 0; i < _blob_reqs.size(); ++i) { if (_blob_reqs[i]->status->ok()) { - CachableEntry blob_entry; + CacheHandleGuard blob_handle; const CacheKey cache_key = base_cache_key.WithOffset(_blob_reqs[i]->offset); const Slice key = cache_key.AsSlice(); - s = PutBlobIntoCache(key, &blob_entry, _blob_reqs[i]->result); + s = PutBlobIntoCache(key, &blob_handle, _blob_reqs[i]->result); if (!s.ok()) { *_blob_reqs[i]->status = s; } @@ -368,10 +392,10 @@ bool BlobSource::TEST_BlobInCache(uint64_t file_number, uint64_t file_size, const CacheKey cache_key = GetCacheKey(file_number, file_size, offset); const Slice key = cache_key.AsSlice(); - CachableEntry blob_entry; - const Status s = GetBlobFromCache(key, &blob_entry); + CacheHandleGuard blob_handle; + const Status s = GetBlobFromCache(key, &blob_handle); - if (s.ok() && blob_entry.GetValue() != nullptr) { + if (s.ok() && blob_handle.GetValue() != nullptr) { return true; } diff --git a/db/blob/blob_source.h b/db/blob/blob_source.h index aa46311c12b..779f8d00fde 100644 --- a/db/blob/blob_source.h +++ b/db/blob/blob_source.h @@ -104,10 +104,10 @@ class BlobSource { private: Status GetBlobFromCache(const Slice& cache_key, - CachableEntry* blob) const; + CacheHandleGuard* blob) const; Status PutBlobIntoCache(const Slice& cache_key, - CachableEntry* cached_blob, + CacheHandleGuard* cached_blob, PinnableSlice* blob) const; Cache::Handle* GetEntryFromCache(const Slice& key) const; diff --git a/table/block_based/cachable_entry.h b/table/block_based/cachable_entry.h index 0b6677fb25d..eb51e98daac 100644 --- a/table/block_based/cachable_entry.h +++ b/table/block_based/cachable_entry.h @@ -119,22 +119,6 @@ class CachableEntry { ResetFields(); } - void TransferTo(PinnableSlice* slice) { - if constexpr (std::is_same_v) { - if (slice) { - slice->Reset(); - if (cache_handle_ != nullptr) { - assert(cache_ != nullptr); - slice->PinSlice(*value_, &ReleaseCacheHandle, cache_, cache_handle_); - } else if (own_value_) { - slice->PinSlice(*value_, &DeleteValue, value_, nullptr); - } - } - } - - ResetFields(); - } - void TransferTo(Cleanable* cleanable) { if (cleanable) { if (cache_handle_ != nullptr) { From 3369e7eb99086d9f0ddcd7266cf942e7dd1f82e6 Mon Sep 17 00:00:00 2001 From: Gang Liao Date: Wed, 6 Jul 2022 15:07:11 -0700 Subject: [PATCH 11/11] Update ReleaseCacheHandle() --- cache/cache_helpers.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cache/cache_helpers.h b/cache/cache_helpers.h index e43e65e6244..7ea2365b88b 100644 --- a/cache/cache_helpers.h +++ b/cache/cache_helpers.h @@ -88,7 +88,7 @@ class CacheHandleGuard { if (cleanable) { if (handle_ != nullptr) { assert(cache_); - cleanable->RegisterCleanup(&ReleaseHandle, cache_, handle_); + cleanable->RegisterCleanup(&ReleaseCacheHandle, cache_, handle_); } } ResetFields(); @@ -115,7 +115,7 @@ class CacheHandleGuard { value_ = nullptr; } - static void ReleaseHandle(void* arg1, void* arg2) { + static void ReleaseCacheHandle(void* arg1, void* arg2) { Cache* const cache = static_cast(arg1); assert(cache);