From 8f828270c560b28238b4a533461249e32449563a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Uzarski?= Date: Fri, 4 Oct 2024 13:16:14 +0200 Subject: [PATCH] retry_policy: don't use wildcard '_' in QueryError match Since last time, during error refactor I introduced a silent bug to the code (https://github.com/scylladb/scylla-rust-driver/pull/1075), I'd like to prevent that from happening in the future. This is why we replace a `_` match with explicit error variants in retry policy modules. We also enable `wildcard_enum_match_arm` clippy lint in this place for QueryError and DbError matches. --- .../downgrading_consistency_retry_policy.rs | 167 +++++++++++------- scylla/src/transport/retry_policy.rs | 152 ++++++++++------ 2 files changed, 200 insertions(+), 119 deletions(-) diff --git a/scylla/src/transport/downgrading_consistency_retry_policy.rs b/scylla/src/transport/downgrading_consistency_retry_policy.rs index 51fcdfd3e..84af127d0 100644 --- a/scylla/src/transport/downgrading_consistency_retry_policy.rs +++ b/scylla/src/transport/downgrading_consistency_retry_policy.rs @@ -91,87 +91,128 @@ impl RetrySession for DowngradingConsistencyRetrySession { decision } + // Do not remove this lint! + // It's there for a reason - we don't want new variants + // automatically fall under `_` pattern when they are introduced. + #[deny(clippy::wildcard_enum_match_arm)] match query_info.error { // Basic errors - there are some problems on this node // Retry on a different one if possible - QueryError::BrokenConnection(_) - | QueryError::ConnectionPoolError(_) - | QueryError::DbError(DbError::Overloaded, _) - | QueryError::DbError(DbError::ServerError, _) - | QueryError::DbError(DbError::TruncateError, _) => { + QueryError::BrokenConnection(_) | QueryError::ConnectionPoolError(_) => { if query_info.is_idempotent { RetryDecision::RetryNextNode(None) } else { RetryDecision::DontRetry } } - // Unavailable - the current node believes that not enough nodes - // are alive to satisfy specified consistency requirements. - QueryError::DbError(DbError::Unavailable { alive, .. }, _) => { - if !self.was_retry { - self.was_retry = true; - max_likely_to_work_cl(*alive, cl) - } else { - RetryDecision::DontRetry - } - } - // ReadTimeout - coordinator didn't receive enough replies in time. - QueryError::DbError( - DbError::ReadTimeout { - received, - required, - data_present, - .. - }, - _, - ) => { - if self.was_retry { - RetryDecision::DontRetry - } else if received < required { - self.was_retry = true; - max_likely_to_work_cl(*received, cl) - } else if !*data_present { - self.was_retry = true; - RetryDecision::RetrySameNode(None) - } else { - RetryDecision::DontRetry - } - } - // Write timeout - coordinator didn't receive enough replies in time. - QueryError::DbError( - DbError::WriteTimeout { - write_type, - received, - .. - }, - _, - ) => { - if self.was_retry || !query_info.is_idempotent { - RetryDecision::DontRetry - } else { - self.was_retry = true; - match write_type { - WriteType::Batch | WriteType::Simple if *received > 0 => { - RetryDecision::IgnoreWriteError + // DbErrors + QueryError::DbError(db_error, _) => { + // Do not remove this lint! + // It's there for a reason - we don't want new variants + // automatically fall under `_` pattern when they are introduced. + #[deny(clippy::wildcard_enum_match_arm)] + match db_error { + // Basic errors - there are some problems on this node + // Retry on a different one if possible + DbError::Overloaded | DbError::ServerError | DbError::TruncateError => { + if query_info.is_idempotent { + RetryDecision::RetryNextNode(None) + } else { + RetryDecision::DontRetry } - - WriteType::UnloggedBatch => { - // Since only part of the batch could have been persisted, - // retry with whatever consistency should allow to persist all + } + // Unavailable - the current node believes that not enough nodes + // are alive to satisfy specified consistency requirements. + DbError::Unavailable { alive, .. } => { + if !self.was_retry { + self.was_retry = true; + max_likely_to_work_cl(*alive, cl) + } else { + RetryDecision::DontRetry + } + } + // ReadTimeout - coordinator didn't receive enough replies in time. + DbError::ReadTimeout { + received, + required, + data_present, + .. + } => { + if self.was_retry { + RetryDecision::DontRetry + } else if received < required { + self.was_retry = true; max_likely_to_work_cl(*received, cl) + } else if !*data_present { + self.was_retry = true; + RetryDecision::RetrySameNode(None) + } else { + RetryDecision::DontRetry + } + } + // Write timeout - coordinator didn't receive enough replies in time. + DbError::WriteTimeout { + write_type, + received, + .. + } => { + if self.was_retry || !query_info.is_idempotent { + RetryDecision::DontRetry + } else { + self.was_retry = true; + match write_type { + WriteType::Batch | WriteType::Simple if *received > 0 => { + RetryDecision::IgnoreWriteError + } + + WriteType::UnloggedBatch => { + // Since only part of the batch could have been persisted, + // retry with whatever consistency should allow to persist all + max_likely_to_work_cl(*received, cl) + } + WriteType::BatchLog => RetryDecision::RetrySameNode(None), + + WriteType::Counter + | WriteType::Cas + | WriteType::View + | WriteType::Cdc + | WriteType::Simple + | WriteType::Batch + | WriteType::Other(_) => RetryDecision::DontRetry, + } } - WriteType::BatchLog => RetryDecision::RetrySameNode(None), - - _ => RetryDecision::DontRetry, } + // The node is still bootstrapping it can't execute the query, we should try another one + DbError::IsBootstrapping => RetryDecision::RetryNextNode(None), + // In all other cases propagate the error to the user + DbError::SyntaxError + | DbError::Invalid + | DbError::AlreadyExists { .. } + | DbError::FunctionFailure { .. } + | DbError::AuthenticationError + | DbError::Unauthorized + | DbError::ConfigError + | DbError::ReadFailure { .. } + | DbError::WriteFailure { .. } + | DbError::Unprepared { .. } + | DbError::ProtocolError + | DbError::RateLimitReached { .. } + | DbError::Other(_) => RetryDecision::DontRetry, } } - // The node is still bootstrapping it can't execute the query, we should try another one - QueryError::DbError(DbError::IsBootstrapping, _) => RetryDecision::RetryNextNode(None), // Connection to the contacted node is overloaded, try another one QueryError::UnableToAllocStreamId => RetryDecision::RetryNextNode(None), // In all other cases propagate the error to the user - _ => RetryDecision::DontRetry, + QueryError::BadQuery(_) + | QueryError::MetadataError(_) + | QueryError::CqlRequestSerialization(_) + | QueryError::BodyExtensionsParseError(_) + | QueryError::EmptyPlan + | QueryError::CqlResultParseError(_) + | QueryError::CqlErrorParseError(_) + | QueryError::ProtocolError(_) + | QueryError::TimeoutError + | QueryError::RequestTimeout(_) => RetryDecision::DontRetry, } } diff --git a/scylla/src/transport/retry_policy.rs b/scylla/src/transport/retry_policy.rs index 44baa4424..9e0e5a1ac 100644 --- a/scylla/src/transport/retry_policy.rs +++ b/scylla/src/transport/retry_policy.rs @@ -139,76 +139,116 @@ impl RetrySession for DefaultRetrySession { if query_info.consistency.is_serial() { return RetryDecision::DontRetry; }; + + // Do not remove this lint! + // It's there for a reason - we don't want new variants + // automatically fall under `_` pattern when they are introduced. + #[deny(clippy::wildcard_enum_match_arm)] match query_info.error { // Basic errors - there are some problems on this node // Retry on a different one if possible - QueryError::BrokenConnection(_) - | QueryError::ConnectionPoolError(_) - | QueryError::DbError(DbError::Overloaded, _) - | QueryError::DbError(DbError::ServerError, _) - | QueryError::DbError(DbError::TruncateError, _) => { + QueryError::BrokenConnection(_) | QueryError::ConnectionPoolError(_) => { if query_info.is_idempotent { RetryDecision::RetryNextNode(None) } else { RetryDecision::DontRetry } } - // Unavailable - the current node believes that not enough nodes - // are alive to satisfy specified consistency requirements. - // Maybe this node has network problems - try a different one. - // Perform at most one retry - it's unlikely that two nodes - // have network problems at the same time - QueryError::DbError(DbError::Unavailable { .. }, _) => { - if !self.was_unavailable_retry { - self.was_unavailable_retry = true; - RetryDecision::RetryNextNode(None) - } else { - RetryDecision::DontRetry - } - } - // ReadTimeout - coordinator didn't receive enough replies in time. - // Retry at most once and only if there were actually enough replies - // to satisfy consistency but they were all just checksums (data_present == false). - // This happens when the coordinator picked replicas that were overloaded/dying. - // Retried request should have some useful response because the node will detect - // that these replicas are dead. - QueryError::DbError( - DbError::ReadTimeout { - received, - required, - data_present, - .. - }, - _, - ) => { - if !self.was_read_timeout_retry && received >= required && !*data_present { - self.was_read_timeout_retry = true; - RetryDecision::RetrySameNode(None) - } else { - RetryDecision::DontRetry - } - } - // Write timeout - coordinator didn't receive enough replies in time. - // Retry at most once and only for BatchLog write. - // Coordinator probably didn't detect the nodes as dead. - // By the time we retry they should be detected as dead. - QueryError::DbError(DbError::WriteTimeout { write_type, .. }, _) => { - if !self.was_write_timeout_retry - && query_info.is_idempotent - && *write_type == WriteType::BatchLog - { - self.was_write_timeout_retry = true; - RetryDecision::RetrySameNode(None) - } else { - RetryDecision::DontRetry + + // DbErrors + QueryError::DbError(db_error, _) => { + // Do not remove this lint! + // It's there for a reason - we don't want new variants + // automatically fall under `_` pattern when they are introduced. + #[deny(clippy::wildcard_enum_match_arm)] + match db_error { + // Basic errors - there are some problems on this node + // Retry on a different one if possible + DbError::Overloaded | DbError::ServerError | DbError::TruncateError => { + if query_info.is_idempotent { + RetryDecision::RetryNextNode(None) + } else { + RetryDecision::DontRetry + } + } + // Unavailable - the current node believes that not enough nodes + // are alive to satisfy specified consistency requirements. + // Maybe this node has network problems - try a different one. + // Perform at most one retry - it's unlikely that two nodes + // have network problems at the same time + DbError::Unavailable { .. } => { + if !self.was_unavailable_retry { + self.was_unavailable_retry = true; + RetryDecision::RetryNextNode(None) + } else { + RetryDecision::DontRetry + } + } + // ReadTimeout - coordinator didn't receive enough replies in time. + // Retry at most once and only if there were actually enough replies + // to satisfy consistency but they were all just checksums (data_present == false). + // This happens when the coordinator picked replicas that were overloaded/dying. + // Retried request should have some useful response because the node will detect + // that these replicas are dead. + DbError::ReadTimeout { + received, + required, + data_present, + .. + } => { + if !self.was_read_timeout_retry && received >= required && !*data_present { + self.was_read_timeout_retry = true; + RetryDecision::RetrySameNode(None) + } else { + RetryDecision::DontRetry + } + } + // Write timeout - coordinator didn't receive enough replies in time. + // Retry at most once and only for BatchLog write. + // Coordinator probably didn't detect the nodes as dead. + // By the time we retry they should be detected as dead. + DbError::WriteTimeout { write_type, .. } => { + if !self.was_write_timeout_retry + && query_info.is_idempotent + && *write_type == WriteType::BatchLog + { + self.was_write_timeout_retry = true; + RetryDecision::RetrySameNode(None) + } else { + RetryDecision::DontRetry + } + } + // The node is still bootstrapping it can't execute the query, we should try another one + DbError::IsBootstrapping => RetryDecision::RetryNextNode(None), + // In all other cases propagate the error to the user + DbError::SyntaxError + | DbError::Invalid + | DbError::AlreadyExists { .. } + | DbError::FunctionFailure { .. } + | DbError::AuthenticationError + | DbError::Unauthorized + | DbError::ConfigError + | DbError::ReadFailure { .. } + | DbError::WriteFailure { .. } + | DbError::Unprepared { .. } + | DbError::ProtocolError + | DbError::RateLimitReached { .. } + | DbError::Other(_) => RetryDecision::DontRetry, } } - // The node is still bootstrapping it can't execute the query, we should try another one - QueryError::DbError(DbError::IsBootstrapping, _) => RetryDecision::RetryNextNode(None), // Connection to the contacted node is overloaded, try another one QueryError::UnableToAllocStreamId => RetryDecision::RetryNextNode(None), // In all other cases propagate the error to the user - _ => RetryDecision::DontRetry, + QueryError::BadQuery(_) + | QueryError::MetadataError(_) + | QueryError::CqlRequestSerialization(_) + | QueryError::BodyExtensionsParseError(_) + | QueryError::EmptyPlan + | QueryError::CqlResultParseError(_) + | QueryError::CqlErrorParseError(_) + | QueryError::ProtocolError(_) + | QueryError::TimeoutError + | QueryError::RequestTimeout(_) => RetryDecision::DontRetry, } }