retry_policy: don't use wildcard '_' in QueryError match

Since last time, during error refactor I introduced a silent bug to the code (scylladb#1075), I'd like to prevent that from happening in the future. This is why we replace a `_` match with explicit error variants in retry policy modules. We also enable `wildcard_enum_match_arm` clippy lint in this place for QueryError and DbError matches.
muzarski · Oct 8, 2024 · 8f82827 · 8f82827
1 parent 7af18ee
commit 8f82827
Show file tree

Hide file tree

Showing 2 changed files with 200 additions and 119 deletions.
diff --git a/scylla/src/transport/downgrading_consistency_retry_policy.rs b/scylla/src/transport/downgrading_consistency_retry_policy.rs
@@ -91,87 +91,128 @@ impl RetrySession for DowngradingConsistencyRetrySession {
             decision
         }
 
+        // Do not remove this lint!
+        // It's there for a reason - we don't want new variants
+        // automatically fall under `_` pattern when they are introduced.
+        #[deny(clippy::wildcard_enum_match_arm)]
         match query_info.error {
             // Basic errors - there are some problems on this node
             // Retry on a different one if possible
-            QueryError::BrokenConnection(_)
-            | QueryError::ConnectionPoolError(_)
-            | QueryError::DbError(DbError::Overloaded, _)
-            | QueryError::DbError(DbError::ServerError, _)
-            | QueryError::DbError(DbError::TruncateError, _) => {
+            QueryError::BrokenConnection(_) | QueryError::ConnectionPoolError(_) => {
                 if query_info.is_idempotent {
                     RetryDecision::RetryNextNode(None)
                 } else {
                     RetryDecision::DontRetry
                 }
             }
-            // Unavailable - the current node believes that not enough nodes
-            // are alive to satisfy specified consistency requirements.
-            QueryError::DbError(DbError::Unavailable { alive, .. }, _) => {
-                if !self.was_retry {
-                    self.was_retry = true;
-                    max_likely_to_work_cl(*alive, cl)
-                } else {
-                    RetryDecision::DontRetry
-                }
-            }
-            // ReadTimeout - coordinator didn't receive enough replies in time.
-            QueryError::DbError(
-                DbError::ReadTimeout {
-                    received,
-                    required,
-                    data_present,
-                    ..
-                },
-                _,
-            ) => {
-                if self.was_retry {
-                    RetryDecision::DontRetry
-                } else if received < required {
-                    self.was_retry = true;
-                    max_likely_to_work_cl(*received, cl)
-                } else if !*data_present {
-                    self.was_retry = true;
-                    RetryDecision::RetrySameNode(None)
-                } else {
-                    RetryDecision::DontRetry
-                }
-            }
-            // Write timeout - coordinator didn't receive enough replies in time.
-            QueryError::DbError(
-                DbError::WriteTimeout {
-                    write_type,
-                    received,
-                    ..
-                },
-                _,
-            ) => {
-                if self.was_retry || !query_info.is_idempotent {
-                    RetryDecision::DontRetry
-                } else {
-                    self.was_retry = true;
-                    match write_type {
-                        WriteType::Batch | WriteType::Simple if *received > 0 => {
-                            RetryDecision::IgnoreWriteError
+            // DbErrors
+            QueryError::DbError(db_error, _) => {
+                // Do not remove this lint!
+                // It's there for a reason - we don't want new variants
+                // automatically fall under `_` pattern when they are introduced.
+                #[deny(clippy::wildcard_enum_match_arm)]
+                match db_error {
+                    // Basic errors - there are some problems on this node
+                    // Retry on a different one if possible
+                    DbError::Overloaded | DbError::ServerError | DbError::TruncateError => {
+                        if query_info.is_idempotent {
+                            RetryDecision::RetryNextNode(None)
+                        } else {
+                            RetryDecision::DontRetry
                         }
-
-                        WriteType::UnloggedBatch => {
-                            // Since only part of the batch could have been persisted,
-                            // retry with whatever consistency should allow to persist all
+                    }
+                    // Unavailable - the current node believes that not enough nodes
+                    // are alive to satisfy specified consistency requirements.
+                    DbError::Unavailable { alive, .. } => {
+                        if !self.was_retry {
+                            self.was_retry = true;
+                            max_likely_to_work_cl(*alive, cl)
+                        } else {
+                            RetryDecision::DontRetry
+                        }
+                    }
+                    // ReadTimeout - coordinator didn't receive enough replies in time.
+                    DbError::ReadTimeout {
+                        received,
+                        required,
+                        data_present,
+                        ..
+                    } => {
+                        if self.was_retry {
+                            RetryDecision::DontRetry
+                        } else if received < required {
+                            self.was_retry = true;
                             max_likely_to_work_cl(*received, cl)
+                        } else if !*data_present {
+                            self.was_retry = true;
+                            RetryDecision::RetrySameNode(None)
+                        } else {
+                            RetryDecision::DontRetry
+                        }
+                    }
+                    // Write timeout - coordinator didn't receive enough replies in time.
+                    DbError::WriteTimeout {
+                        write_type,
+                        received,
+                        ..
+                    } => {
+                        if self.was_retry || !query_info.is_idempotent {
+                            RetryDecision::DontRetry
+                        } else {
+                            self.was_retry = true;
+                            match write_type {
+                                WriteType::Batch | WriteType::Simple if *received > 0 => {
+                                    RetryDecision::IgnoreWriteError
+                                }
+
+                                WriteType::UnloggedBatch => {
+                                    // Since only part of the batch could have been persisted,
+                                    // retry with whatever consistency should allow to persist all
+                                    max_likely_to_work_cl(*received, cl)
+                                }
+                                WriteType::BatchLog => RetryDecision::RetrySameNode(None),
+
+                                WriteType::Counter
+                                | WriteType::Cas
+                                | WriteType::View
+                                | WriteType::Cdc
+                                | WriteType::Simple
+                                | WriteType::Batch
+                                | WriteType::Other(_) => RetryDecision::DontRetry,
+                            }
                         }
-                        WriteType::BatchLog => RetryDecision::RetrySameNode(None),
-
-                        _ => RetryDecision::DontRetry,
                     }
+                    // The node is still bootstrapping it can't execute the query, we should try another one
+                    DbError::IsBootstrapping => RetryDecision::RetryNextNode(None),
+                    // In all other cases propagate the error to the user
+                    DbError::SyntaxError
+                    | DbError::Invalid
+                    | DbError::AlreadyExists { .. }
+                    | DbError::FunctionFailure { .. }
+                    | DbError::AuthenticationError
+                    | DbError::Unauthorized
+                    | DbError::ConfigError
+                    | DbError::ReadFailure { .. }
+                    | DbError::WriteFailure { .. }
+                    | DbError::Unprepared { .. }
+                    | DbError::ProtocolError
+                    | DbError::RateLimitReached { .. }
+                    | DbError::Other(_) => RetryDecision::DontRetry,
                 }
             }
-            // The node is still bootstrapping it can't execute the query, we should try another one
-            QueryError::DbError(DbError::IsBootstrapping, _) => RetryDecision::RetryNextNode(None),
             // Connection to the contacted node is overloaded, try another one
             QueryError::UnableToAllocStreamId => RetryDecision::RetryNextNode(None),
             // In all other cases propagate the error to the user
-            _ => RetryDecision::DontRetry,
+            QueryError::BadQuery(_)
+            | QueryError::MetadataError(_)
+            | QueryError::CqlRequestSerialization(_)
+            | QueryError::BodyExtensionsParseError(_)
+            | QueryError::EmptyPlan
+            | QueryError::CqlResultParseError(_)
+            | QueryError::CqlErrorParseError(_)
+            | QueryError::ProtocolError(_)
+            | QueryError::TimeoutError
+            | QueryError::RequestTimeout(_) => RetryDecision::DontRetry,
         }
     }
 

diff --git a/scylla/src/transport/retry_policy.rs b/scylla/src/transport/retry_policy.rs
@@ -139,76 +139,116 @@ impl RetrySession for DefaultRetrySession {
         if query_info.consistency.is_serial() {
             return RetryDecision::DontRetry;
         };
+
+        // Do not remove this lint!
+        // It's there for a reason - we don't want new variants
+        // automatically fall under `_` pattern when they are introduced.
+        #[deny(clippy::wildcard_enum_match_arm)]
         match query_info.error {
             // Basic errors - there are some problems on this node
             // Retry on a different one if possible
-            QueryError::BrokenConnection(_)
-            | QueryError::ConnectionPoolError(_)
-            | QueryError::DbError(DbError::Overloaded, _)
-            | QueryError::DbError(DbError::ServerError, _)
-            | QueryError::DbError(DbError::TruncateError, _) => {
+            QueryError::BrokenConnection(_) | QueryError::ConnectionPoolError(_) => {
                 if query_info.is_idempotent {
                     RetryDecision::RetryNextNode(None)
                 } else {
                     RetryDecision::DontRetry
                 }
             }
-            // Unavailable - the current node believes that not enough nodes
-            // are alive to satisfy specified consistency requirements.
-            // Maybe this node has network problems - try a different one.
-            // Perform at most one retry - it's unlikely that two nodes
-            // have network problems at the same time
-            QueryError::DbError(DbError::Unavailable { .. }, _) => {
-                if !self.was_unavailable_retry {
-                    self.was_unavailable_retry = true;
-                    RetryDecision::RetryNextNode(None)
-                } else {
-                    RetryDecision::DontRetry
-                }
-            }
-            // ReadTimeout - coordinator didn't receive enough replies in time.
-            // Retry at most once and only if there were actually enough replies
-            // to satisfy consistency but they were all just checksums (data_present == false).
-            // This happens when the coordinator picked replicas that were overloaded/dying.
-            // Retried request should have some useful response because the node will detect
-            // that these replicas are dead.
-            QueryError::DbError(
-                DbError::ReadTimeout {
-                    received,
-                    required,
-                    data_present,
-                    ..
-                },
-                _,
-            ) => {
-                if !self.was_read_timeout_retry && received >= required && !*data_present {
-                    self.was_read_timeout_retry = true;
-                    RetryDecision::RetrySameNode(None)
-                } else {
-                    RetryDecision::DontRetry
-                }
-            }
-            // Write timeout - coordinator didn't receive enough replies in time.
-            // Retry at most once and only for BatchLog write.
-            // Coordinator probably didn't detect the nodes as dead.
-            // By the time we retry they should be detected as dead.
-            QueryError::DbError(DbError::WriteTimeout { write_type, .. }, _) => {
-                if !self.was_write_timeout_retry
-                    && query_info.is_idempotent
-                    && *write_type == WriteType::BatchLog
-                {
-                    self.was_write_timeout_retry = true;
-                    RetryDecision::RetrySameNode(None)
-                } else {
-                    RetryDecision::DontRetry
+
+            // DbErrors
+            QueryError::DbError(db_error, _) => {
+                // Do not remove this lint!
+                // It's there for a reason - we don't want new variants
+                // automatically fall under `_` pattern when they are introduced.
+                #[deny(clippy::wildcard_enum_match_arm)]
+                match db_error {
+                    // Basic errors - there are some problems on this node
+                    // Retry on a different one if possible
+                    DbError::Overloaded | DbError::ServerError | DbError::TruncateError => {
+                        if query_info.is_idempotent {
+                            RetryDecision::RetryNextNode(None)
+                        } else {
+                            RetryDecision::DontRetry
+                        }
+                    }
+                    // Unavailable - the current node believes that not enough nodes
+                    // are alive to satisfy specified consistency requirements.
+                    // Maybe this node has network problems - try a different one.
+                    // Perform at most one retry - it's unlikely that two nodes
+                    // have network problems at the same time
+                    DbError::Unavailable { .. } => {
+                        if !self.was_unavailable_retry {
+                            self.was_unavailable_retry = true;
+                            RetryDecision::RetryNextNode(None)
+                        } else {
+                            RetryDecision::DontRetry
+                        }
+                    }
+                    // ReadTimeout - coordinator didn't receive enough replies in time.
+                    // Retry at most once and only if there were actually enough replies
+                    // to satisfy consistency but they were all just checksums (data_present == false).
+                    // This happens when the coordinator picked replicas that were overloaded/dying.
+                    // Retried request should have some useful response because the node will detect
+                    // that these replicas are dead.
+                    DbError::ReadTimeout {
+                        received,
+                        required,
+                        data_present,
+                        ..
+                    } => {
+                        if !self.was_read_timeout_retry && received >= required && !*data_present {
+                            self.was_read_timeout_retry = true;
+                            RetryDecision::RetrySameNode(None)
+                        } else {
+                            RetryDecision::DontRetry
+                        }
+                    }
+                    // Write timeout - coordinator didn't receive enough replies in time.
+                    // Retry at most once and only for BatchLog write.
+                    // Coordinator probably didn't detect the nodes as dead.
+                    // By the time we retry they should be detected as dead.
+                    DbError::WriteTimeout { write_type, .. } => {
+                        if !self.was_write_timeout_retry
+                            && query_info.is_idempotent
+                            && *write_type == WriteType::BatchLog
+                        {
+                            self.was_write_timeout_retry = true;
+                            RetryDecision::RetrySameNode(None)
+                        } else {
+                            RetryDecision::DontRetry
+                        }
+                    }
+                    // The node is still bootstrapping it can't execute the query, we should try another one
+                    DbError::IsBootstrapping => RetryDecision::RetryNextNode(None),
+                    // In all other cases propagate the error to the user
+                    DbError::SyntaxError
+                    | DbError::Invalid
+                    | DbError::AlreadyExists { .. }
+                    | DbError::FunctionFailure { .. }
+                    | DbError::AuthenticationError
+                    | DbError::Unauthorized
+                    | DbError::ConfigError
+                    | DbError::ReadFailure { .. }
+                    | DbError::WriteFailure { .. }
+                    | DbError::Unprepared { .. }
+                    | DbError::ProtocolError
+                    | DbError::RateLimitReached { .. }
+                    | DbError::Other(_) => RetryDecision::DontRetry,
                 }
             }
-            // The node is still bootstrapping it can't execute the query, we should try another one
-            QueryError::DbError(DbError::IsBootstrapping, _) => RetryDecision::RetryNextNode(None),
             // Connection to the contacted node is overloaded, try another one
             QueryError::UnableToAllocStreamId => RetryDecision::RetryNextNode(None),
             // In all other cases propagate the error to the user
-            _ => RetryDecision::DontRetry,
+            QueryError::BadQuery(_)
+            | QueryError::MetadataError(_)
+            | QueryError::CqlRequestSerialization(_)
+            | QueryError::BodyExtensionsParseError(_)
+            | QueryError::EmptyPlan
+            | QueryError::CqlResultParseError(_)
+            | QueryError::CqlErrorParseError(_)
+            | QueryError::ProtocolError(_)
+            | QueryError::TimeoutError
+            | QueryError::RequestTimeout(_) => RetryDecision::DontRetry,
         }
     }