From 8e3bab73f158d9ccd1d2969a716941818ad9bc5c Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Tue, 8 Feb 2022 18:38:08 -0800 Subject: [PATCH] healing reads should not bypass object cache (#179) The performance of healing reads is sometimes relevant. For example, during `zpool import` we do healing reads of the blocks written in the past few TXG's. Also the performance of `zpool scrub` may matter. The current behavior is to bypass all caching for healing reads, including the object cache. However, this has a huge performance impact since we can see ~700x read inflation as we read every object once for each block in it. This inflation is guaranteed for `zpool import` since the last few TXG's are necessarily densely packed into objects. This commit changes the behavior of healing reads to bypass only the zettacache, not the object cache, by default. If the old behavior is desired, the tunable can be changed, or the agent can be restarted the clear the object cache. I would recommend that a later commit removes the tunable and simplifies the code. --- .../zettaobject/src/object_access.rs | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/cmd/zfs_object_agent/zettaobject/src/object_access.rs b/cmd/zfs_object_agent/zettaobject/src/object_access.rs index 3e03ff71a7d1..fc5bd92787cf 100644 --- a/cmd/zfs_object_agent/zettaobject/src/object_access.rs +++ b/cmd/zfs_object_agent/zettaobject/src/object_access.rs @@ -53,6 +53,7 @@ lazy_static! { static ref LONG_OPERATION_DURATION: Duration = Duration::from_secs(get_tunable("long_operation_secs", 2)); pub static ref OBJECT_DELETION_BATCH_SIZE: usize = get_tunable("object_deletion_batch_size", 1000); + pub static ref OBJECT_CACHE_IS_BYPASSABLE: bool = get_tunable("object_cache_is_bypassable", false); } #[derive(Debug, Enum, Copy, Clone)] @@ -542,14 +543,18 @@ impl ObjectAccess { key: String, stat_type: ObjectAccessStatType, ) -> Result { - let bytes = self.get_object_impl(key.clone(), stat_type, None).await?; - // Note: we *should* have the same data from S3 (in the `vec`) and in - // the cache, so this invalidation is normally not necessary. However, - // in case a bug (or undetected RAM error) resulted in incorrect cached - // data, we want to invalidate the cache so that we won't get the bad - // cached data again. - Self::invalidate_cache(key); - Ok(bytes) + if *OBJECT_CACHE_IS_BYPASSABLE { + let bytes = self.get_object_impl(key.clone(), stat_type, None).await?; + // Note: we *should* have the same data from S3 (in the `vec`) and in + // the cache, so this invalidation is normally not necessary. However, + // in case a bug (or undetected RAM error) resulted in incorrect cached + // data, we want to invalidate the cache so that we won't get the bad + // cached data again. + Self::invalidate_cache(key); + Ok(bytes) + } else { + self.get_object(key, stat_type).await + } } pub async fn get_object(&self, key: String, stat_type: ObjectAccessStatType) -> Result {