From 4dc60f0bd265881abdc881c555f9eb4269b5dd1c Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Fri, 18 Oct 2024 10:51:53 +0900 Subject: [PATCH] Adding a metric to count storage errors and their error code. (#5497) --- quickwit/quickwit-storage/src/metrics.rs | 11 ++++++++++- quickwit/quickwit-storage/src/object_storage/error.rs | 7 ++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/quickwit/quickwit-storage/src/metrics.rs b/quickwit/quickwit-storage/src/metrics.rs index 0e1547a340f..77287de891b 100644 --- a/quickwit/quickwit-storage/src/metrics.rs +++ b/quickwit/quickwit-storage/src/metrics.rs @@ -21,7 +21,8 @@ use once_cell::sync::Lazy; use quickwit_common::metrics::{ - new_counter, new_counter_vec, new_counter_with_labels, new_gauge, IntCounter, IntGauge, + new_counter, new_counter_vec, new_counter_with_labels, new_gauge, IntCounter, IntCounterVec, + IntGauge, }; /// Counters associated to storage operations. @@ -35,6 +36,7 @@ pub struct StorageMetrics { pub get_slice_timeout_successes: [IntCounter; 3], pub get_slice_timeout_all_timeouts: IntCounter, pub object_storage_get_total: IntCounter, + pub object_storage_get_errors_total: IntCounterVec<1>, pub object_storage_put_total: IntCounter, pub object_storage_put_parts: IntCounter, pub object_storage_download_num_bytes: IntCounter, @@ -73,6 +75,13 @@ impl Default for StorageMetrics { "storage", &[], ), + object_storage_get_errors_total: new_counter_vec::<1>( + "object_storage_get_errors_total", + "Number of GetObject errors.", + "storage", + &[], + ["code"], + ), object_storage_put_total: new_counter( "object_storage_puts_total", "Number of objects uploaded. May differ from object_storage_requests_parts due to \ diff --git a/quickwit/quickwit-storage/src/object_storage/error.rs b/quickwit/quickwit-storage/src/object_storage/error.rs index e447d0ce573..b1e94608104 100644 --- a/quickwit/quickwit-storage/src/object_storage/error.rs +++ b/quickwit/quickwit-storage/src/object_storage/error.rs @@ -17,7 +17,7 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . -use aws_sdk_s3::error::{DisplayErrorContext, SdkError}; +use aws_sdk_s3::error::{DisplayErrorContext, ProvideErrorMetadata, SdkError}; use aws_sdk_s3::operation::abort_multipart_upload::AbortMultipartUploadError; use aws_sdk_s3::operation::complete_multipart_upload::CompleteMultipartUploadError; use aws_sdk_s3::operation::create_multipart_upload::CreateMultipartUploadError; @@ -67,6 +67,11 @@ pub trait ToStorageErrorKind { impl ToStorageErrorKind for GetObjectError { fn to_storage_error_kind(&self) -> StorageErrorKind { + let error_code = self.code().unwrap_or("unknown"); + crate::STORAGE_METRICS + .object_storage_get_errors_total + .with_label_values([error_code]) + .inc(); match self { GetObjectError::InvalidObjectState(_) => StorageErrorKind::Service, GetObjectError::NoSuchKey(_) => StorageErrorKind::NotFound,