From 5785a57f2912ab369dc859cc5263c66c4339bf6d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miko=C5=82aj=20Uzarski?= <mikolaj.uzarski@scylladb.com>
Date: Tue, 8 Oct 2024 15:44:48 +0200
Subject: [PATCH 1/5] lbp: don't use '_' match in reliable_latency_measure

Since last time, during error refactor I introduced a silent
bug to the code (https://github.com/scylladb/scylla-rust-driver/pull/1075),
I'd like to prevent that from happening in the future. This is why
we replace a `_` match with explicit error variants in `reliable_latency_measure` function.

We also enable the `wildcard_enum_match_arm` clippy lint here.
---
 .../src/transport/load_balancing/default.rs   | 50 +++++++++++++++----
 1 file changed, 41 insertions(+), 9 deletions(-)

diff --git a/scylla/src/transport/load_balancing/default.rs b/scylla/src/transport/load_balancing/default.rs
index 51db7f97f5..773b0c19ca 100644
--- a/scylla/src/transport/load_balancing/default.rs
+++ b/scylla/src/transport/load_balancing/default.rs
@@ -2838,29 +2838,61 @@ mod latency_awareness {
         }
 
         pub(crate) fn reliable_latency_measure(error: &QueryError) -> bool {
+            // Do not remove this lint!
+            // It's there for a reason - we don't want new variants
+            // automatically fall under `_` pattern when they are introduced.
+            #[deny(clippy::wildcard_enum_match_arm)]
             match error {
-                // "fast" errors, i.e. ones that are returned quickly after the query begins
+                // "fast" errors, i.e. ones that appeared on driver side, before sending the request
                 QueryError::BadQuery(_)
                 | QueryError::CqlRequestSerialization(_)
                 | QueryError::BrokenConnection(_)
                 | QueryError::ConnectionPoolError(_)
                 | QueryError::EmptyPlan
-                | QueryError::UnableToAllocStreamId
-                | QueryError::DbError(DbError::IsBootstrapping, _)
-                | QueryError::DbError(DbError::Unavailable { .. }, _)
-                | QueryError::DbError(DbError::Unprepared { .. }, _)
-                | QueryError::DbError(DbError::Overloaded { .. }, _)
-                | QueryError::DbError(DbError::RateLimitReached { .. }, _) => false,
+                | QueryError::UnableToAllocStreamId => false,
 
                 // "slow" errors, i.e. ones that are returned after considerable time of query being run
-                QueryError::DbError(_, _)
-                | QueryError::CqlResultParseError(_)
+                QueryError::CqlResultParseError(_)
                 | QueryError::CqlErrorParseError(_)
                 | QueryError::BodyExtensionsParseError(_)
                 | QueryError::MetadataError(_)
                 | QueryError::ProtocolError(_)
                 | QueryError::TimeoutError
                 | QueryError::RequestTimeout(_) => true,
+
+                // handle db errors
+                QueryError::DbError(db_error, _) => {
+                    // Do not remove this lint!
+                    // It's there for a reason - we don't want new variants
+                    // automatically fall under `_` pattern when they are introduced.
+                    #[deny(clippy::wildcard_enum_match_arm)]
+                    match db_error {
+                        // An errors that appeared on server side, but did not require selected node
+                        // to contact other nodes (i.e. fast server side errors).
+                        DbError::IsBootstrapping
+                        | DbError::Unavailable { .. }
+                        | DbError::Unprepared { .. }
+                        | DbError::Overloaded { .. }
+                        | DbError::RateLimitReached { .. } => false,
+
+                        // Rest of server side errors that take considerable amount of time to be returned
+                        DbError::SyntaxError
+                        | DbError::Invalid
+                        | DbError::AlreadyExists { .. }
+                        | DbError::FunctionFailure { .. }
+                        | DbError::AuthenticationError
+                        | DbError::TruncateError
+                        | DbError::ReadTimeout { .. }
+                        | DbError::WriteTimeout { .. }
+                        | DbError::ReadFailure { .. }
+                        | DbError::WriteFailure { .. }
+                        | DbError::ServerError
+                        | DbError::ProtocolError
+                        | DbError::Unauthorized
+                        | DbError::ConfigError
+                        | DbError::Other(_) => true,
+                    }
+                }
             }
         }
     }

From 4ef58f57b7f9a2d1dd9581edf49c8d42154e66c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miko=C5=82aj=20Uzarski?= <mikolaj.uzarski@scylladb.com>
Date: Fri, 4 Oct 2024 13:16:14 +0200
Subject: [PATCH 2/5] retry_policy: don't use wildcard '_' in QueryError match

Since last time, during error refactor I introduced a silent
bug to the code (https://github.com/scylladb/scylla-rust-driver/pull/1075),
I'd like to prevent that from happening in the future. This is why
we replace a `_` match with explicit error variants in retry policy
modules.

We also enable `wildcard_enum_match_arm` clippy lint in this place
for QueryError and DbError matches.
---
 .../downgrading_consistency_retry_policy.rs   | 167 +++++++++++-------
 scylla/src/transport/retry_policy.rs          | 152 ++++++++++------
 2 files changed, 200 insertions(+), 119 deletions(-)

diff --git a/scylla/src/transport/downgrading_consistency_retry_policy.rs b/scylla/src/transport/downgrading_consistency_retry_policy.rs
index 51fcdfd3ea..84af127d09 100644
--- a/scylla/src/transport/downgrading_consistency_retry_policy.rs
+++ b/scylla/src/transport/downgrading_consistency_retry_policy.rs
@@ -91,87 +91,128 @@ impl RetrySession for DowngradingConsistencyRetrySession {
             decision
         }
 
+        // Do not remove this lint!
+        // It's there for a reason - we don't want new variants
+        // automatically fall under `_` pattern when they are introduced.
+        #[deny(clippy::wildcard_enum_match_arm)]
         match query_info.error {
             // Basic errors - there are some problems on this node
             // Retry on a different one if possible
-            QueryError::BrokenConnection(_)
-            | QueryError::ConnectionPoolError(_)
-            | QueryError::DbError(DbError::Overloaded, _)
-            | QueryError::DbError(DbError::ServerError, _)
-            | QueryError::DbError(DbError::TruncateError, _) => {
+            QueryError::BrokenConnection(_) | QueryError::ConnectionPoolError(_) => {
                 if query_info.is_idempotent {
                     RetryDecision::RetryNextNode(None)
                 } else {
                     RetryDecision::DontRetry
                 }
             }
-            // Unavailable - the current node believes that not enough nodes
-            // are alive to satisfy specified consistency requirements.
-            QueryError::DbError(DbError::Unavailable { alive, .. }, _) => {
-                if !self.was_retry {
-                    self.was_retry = true;
-                    max_likely_to_work_cl(*alive, cl)
-                } else {
-                    RetryDecision::DontRetry
-                }
-            }
-            // ReadTimeout - coordinator didn't receive enough replies in time.
-            QueryError::DbError(
-                DbError::ReadTimeout {
-                    received,
-                    required,
-                    data_present,
-                    ..
-                },
-                _,
-            ) => {
-                if self.was_retry {
-                    RetryDecision::DontRetry
-                } else if received < required {
-                    self.was_retry = true;
-                    max_likely_to_work_cl(*received, cl)
-                } else if !*data_present {
-                    self.was_retry = true;
-                    RetryDecision::RetrySameNode(None)
-                } else {
-                    RetryDecision::DontRetry
-                }
-            }
-            // Write timeout - coordinator didn't receive enough replies in time.
-            QueryError::DbError(
-                DbError::WriteTimeout {
-                    write_type,
-                    received,
-                    ..
-                },
-                _,
-            ) => {
-                if self.was_retry || !query_info.is_idempotent {
-                    RetryDecision::DontRetry
-                } else {
-                    self.was_retry = true;
-                    match write_type {
-                        WriteType::Batch | WriteType::Simple if *received > 0 => {
-                            RetryDecision::IgnoreWriteError
+            // DbErrors
+            QueryError::DbError(db_error, _) => {
+                // Do not remove this lint!
+                // It's there for a reason - we don't want new variants
+                // automatically fall under `_` pattern when they are introduced.
+                #[deny(clippy::wildcard_enum_match_arm)]
+                match db_error {
+                    // Basic errors - there are some problems on this node
+                    // Retry on a different one if possible
+                    DbError::Overloaded | DbError::ServerError | DbError::TruncateError => {
+                        if query_info.is_idempotent {
+                            RetryDecision::RetryNextNode(None)
+                        } else {
+                            RetryDecision::DontRetry
                         }
-
-                        WriteType::UnloggedBatch => {
-                            // Since only part of the batch could have been persisted,
-                            // retry with whatever consistency should allow to persist all
+                    }
+                    // Unavailable - the current node believes that not enough nodes
+                    // are alive to satisfy specified consistency requirements.
+                    DbError::Unavailable { alive, .. } => {
+                        if !self.was_retry {
+                            self.was_retry = true;
+                            max_likely_to_work_cl(*alive, cl)
+                        } else {
+                            RetryDecision::DontRetry
+                        }
+                    }
+                    // ReadTimeout - coordinator didn't receive enough replies in time.
+                    DbError::ReadTimeout {
+                        received,
+                        required,
+                        data_present,
+                        ..
+                    } => {
+                        if self.was_retry {
+                            RetryDecision::DontRetry
+                        } else if received < required {
+                            self.was_retry = true;
                             max_likely_to_work_cl(*received, cl)
+                        } else if !*data_present {
+                            self.was_retry = true;
+                            RetryDecision::RetrySameNode(None)
+                        } else {
+                            RetryDecision::DontRetry
+                        }
+                    }
+                    // Write timeout - coordinator didn't receive enough replies in time.
+                    DbError::WriteTimeout {
+                        write_type,
+                        received,
+                        ..
+                    } => {
+                        if self.was_retry || !query_info.is_idempotent {
+                            RetryDecision::DontRetry
+                        } else {
+                            self.was_retry = true;
+                            match write_type {
+                                WriteType::Batch | WriteType::Simple if *received > 0 => {
+                                    RetryDecision::IgnoreWriteError
+                                }
+
+                                WriteType::UnloggedBatch => {
+                                    // Since only part of the batch could have been persisted,
+                                    // retry with whatever consistency should allow to persist all
+                                    max_likely_to_work_cl(*received, cl)
+                                }
+                                WriteType::BatchLog => RetryDecision::RetrySameNode(None),
+
+                                WriteType::Counter
+                                | WriteType::Cas
+                                | WriteType::View
+                                | WriteType::Cdc
+                                | WriteType::Simple
+                                | WriteType::Batch
+                                | WriteType::Other(_) => RetryDecision::DontRetry,
+                            }
                         }
-                        WriteType::BatchLog => RetryDecision::RetrySameNode(None),
-
-                        _ => RetryDecision::DontRetry,
                     }
+                    // The node is still bootstrapping it can't execute the query, we should try another one
+                    DbError::IsBootstrapping => RetryDecision::RetryNextNode(None),
+                    // In all other cases propagate the error to the user
+                    DbError::SyntaxError
+                    | DbError::Invalid
+                    | DbError::AlreadyExists { .. }
+                    | DbError::FunctionFailure { .. }
+                    | DbError::AuthenticationError
+                    | DbError::Unauthorized
+                    | DbError::ConfigError
+                    | DbError::ReadFailure { .. }
+                    | DbError::WriteFailure { .. }
+                    | DbError::Unprepared { .. }
+                    | DbError::ProtocolError
+                    | DbError::RateLimitReached { .. }
+                    | DbError::Other(_) => RetryDecision::DontRetry,
                 }
             }
-            // The node is still bootstrapping it can't execute the query, we should try another one
-            QueryError::DbError(DbError::IsBootstrapping, _) => RetryDecision::RetryNextNode(None),
             // Connection to the contacted node is overloaded, try another one
             QueryError::UnableToAllocStreamId => RetryDecision::RetryNextNode(None),
             // In all other cases propagate the error to the user
-            _ => RetryDecision::DontRetry,
+            QueryError::BadQuery(_)
+            | QueryError::MetadataError(_)
+            | QueryError::CqlRequestSerialization(_)
+            | QueryError::BodyExtensionsParseError(_)
+            | QueryError::EmptyPlan
+            | QueryError::CqlResultParseError(_)
+            | QueryError::CqlErrorParseError(_)
+            | QueryError::ProtocolError(_)
+            | QueryError::TimeoutError
+            | QueryError::RequestTimeout(_) => RetryDecision::DontRetry,
         }
     }
 
diff --git a/scylla/src/transport/retry_policy.rs b/scylla/src/transport/retry_policy.rs
index 44baa44248..9e0e5a1acd 100644
--- a/scylla/src/transport/retry_policy.rs
+++ b/scylla/src/transport/retry_policy.rs
@@ -139,76 +139,116 @@ impl RetrySession for DefaultRetrySession {
         if query_info.consistency.is_serial() {
             return RetryDecision::DontRetry;
         };
+
+        // Do not remove this lint!
+        // It's there for a reason - we don't want new variants
+        // automatically fall under `_` pattern when they are introduced.
+        #[deny(clippy::wildcard_enum_match_arm)]
         match query_info.error {
             // Basic errors - there are some problems on this node
             // Retry on a different one if possible
-            QueryError::BrokenConnection(_)
-            | QueryError::ConnectionPoolError(_)
-            | QueryError::DbError(DbError::Overloaded, _)
-            | QueryError::DbError(DbError::ServerError, _)
-            | QueryError::DbError(DbError::TruncateError, _) => {
+            QueryError::BrokenConnection(_) | QueryError::ConnectionPoolError(_) => {
                 if query_info.is_idempotent {
                     RetryDecision::RetryNextNode(None)
                 } else {
                     RetryDecision::DontRetry
                 }
             }
-            // Unavailable - the current node believes that not enough nodes
-            // are alive to satisfy specified consistency requirements.
-            // Maybe this node has network problems - try a different one.
-            // Perform at most one retry - it's unlikely that two nodes
-            // have network problems at the same time
-            QueryError::DbError(DbError::Unavailable { .. }, _) => {
-                if !self.was_unavailable_retry {
-                    self.was_unavailable_retry = true;
-                    RetryDecision::RetryNextNode(None)
-                } else {
-                    RetryDecision::DontRetry
-                }
-            }
-            // ReadTimeout - coordinator didn't receive enough replies in time.
-            // Retry at most once and only if there were actually enough replies
-            // to satisfy consistency but they were all just checksums (data_present == false).
-            // This happens when the coordinator picked replicas that were overloaded/dying.
-            // Retried request should have some useful response because the node will detect
-            // that these replicas are dead.
-            QueryError::DbError(
-                DbError::ReadTimeout {
-                    received,
-                    required,
-                    data_present,
-                    ..
-                },
-                _,
-            ) => {
-                if !self.was_read_timeout_retry && received >= required && !*data_present {
-                    self.was_read_timeout_retry = true;
-                    RetryDecision::RetrySameNode(None)
-                } else {
-                    RetryDecision::DontRetry
-                }
-            }
-            // Write timeout - coordinator didn't receive enough replies in time.
-            // Retry at most once and only for BatchLog write.
-            // Coordinator probably didn't detect the nodes as dead.
-            // By the time we retry they should be detected as dead.
-            QueryError::DbError(DbError::WriteTimeout { write_type, .. }, _) => {
-                if !self.was_write_timeout_retry
-                    && query_info.is_idempotent
-                    && *write_type == WriteType::BatchLog
-                {
-                    self.was_write_timeout_retry = true;
-                    RetryDecision::RetrySameNode(None)
-                } else {
-                    RetryDecision::DontRetry
+
+            // DbErrors
+            QueryError::DbError(db_error, _) => {
+                // Do not remove this lint!
+                // It's there for a reason - we don't want new variants
+                // automatically fall under `_` pattern when they are introduced.
+                #[deny(clippy::wildcard_enum_match_arm)]
+                match db_error {
+                    // Basic errors - there are some problems on this node
+                    // Retry on a different one if possible
+                    DbError::Overloaded | DbError::ServerError | DbError::TruncateError => {
+                        if query_info.is_idempotent {
+                            RetryDecision::RetryNextNode(None)
+                        } else {
+                            RetryDecision::DontRetry
+                        }
+                    }
+                    // Unavailable - the current node believes that not enough nodes
+                    // are alive to satisfy specified consistency requirements.
+                    // Maybe this node has network problems - try a different one.
+                    // Perform at most one retry - it's unlikely that two nodes
+                    // have network problems at the same time
+                    DbError::Unavailable { .. } => {
+                        if !self.was_unavailable_retry {
+                            self.was_unavailable_retry = true;
+                            RetryDecision::RetryNextNode(None)
+                        } else {
+                            RetryDecision::DontRetry
+                        }
+                    }
+                    // ReadTimeout - coordinator didn't receive enough replies in time.
+                    // Retry at most once and only if there were actually enough replies
+                    // to satisfy consistency but they were all just checksums (data_present == false).
+                    // This happens when the coordinator picked replicas that were overloaded/dying.
+                    // Retried request should have some useful response because the node will detect
+                    // that these replicas are dead.
+                    DbError::ReadTimeout {
+                        received,
+                        required,
+                        data_present,
+                        ..
+                    } => {
+                        if !self.was_read_timeout_retry && received >= required && !*data_present {
+                            self.was_read_timeout_retry = true;
+                            RetryDecision::RetrySameNode(None)
+                        } else {
+                            RetryDecision::DontRetry
+                        }
+                    }
+                    // Write timeout - coordinator didn't receive enough replies in time.
+                    // Retry at most once and only for BatchLog write.
+                    // Coordinator probably didn't detect the nodes as dead.
+                    // By the time we retry they should be detected as dead.
+                    DbError::WriteTimeout { write_type, .. } => {
+                        if !self.was_write_timeout_retry
+                            && query_info.is_idempotent
+                            && *write_type == WriteType::BatchLog
+                        {
+                            self.was_write_timeout_retry = true;
+                            RetryDecision::RetrySameNode(None)
+                        } else {
+                            RetryDecision::DontRetry
+                        }
+                    }
+                    // The node is still bootstrapping it can't execute the query, we should try another one
+                    DbError::IsBootstrapping => RetryDecision::RetryNextNode(None),
+                    // In all other cases propagate the error to the user
+                    DbError::SyntaxError
+                    | DbError::Invalid
+                    | DbError::AlreadyExists { .. }
+                    | DbError::FunctionFailure { .. }
+                    | DbError::AuthenticationError
+                    | DbError::Unauthorized
+                    | DbError::ConfigError
+                    | DbError::ReadFailure { .. }
+                    | DbError::WriteFailure { .. }
+                    | DbError::Unprepared { .. }
+                    | DbError::ProtocolError
+                    | DbError::RateLimitReached { .. }
+                    | DbError::Other(_) => RetryDecision::DontRetry,
                 }
             }
-            // The node is still bootstrapping it can't execute the query, we should try another one
-            QueryError::DbError(DbError::IsBootstrapping, _) => RetryDecision::RetryNextNode(None),
             // Connection to the contacted node is overloaded, try another one
             QueryError::UnableToAllocStreamId => RetryDecision::RetryNextNode(None),
             // In all other cases propagate the error to the user
-            _ => RetryDecision::DontRetry,
+            QueryError::BadQuery(_)
+            | QueryError::MetadataError(_)
+            | QueryError::CqlRequestSerialization(_)
+            | QueryError::BodyExtensionsParseError(_)
+            | QueryError::EmptyPlan
+            | QueryError::CqlResultParseError(_)
+            | QueryError::CqlErrorParseError(_)
+            | QueryError::ProtocolError(_)
+            | QueryError::TimeoutError
+            | QueryError::RequestTimeout(_) => RetryDecision::DontRetry,
         }
     }
 

From a360f7af8469fae1074fa6feb0adb6cd70f77540 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miko=C5=82aj=20Uzarski?= <mikolaj.uzarski@scylladb.com>
Date: Fri, 4 Oct 2024 13:29:53 +0200
Subject: [PATCH 3/5] use_keyspace: don't use wildcard '_' in QueryError match

Since last time, during error refactor I introduced a silent
bug to the code (https://github.com/scylladb/scylla-rust-driver/pull/1075),
I'd like to prevent that from happening in the future. This is why
we replace a `_` match with explicit error variants when deciding
if error received after `USE KEYSPACE` should be ignored.

We also enable the `wildcard_enum_match_arm` clippy lint to disallow
using `_` matches.
---
 scylla/src/transport/cluster.rs |  9 +++++----
 scylla/src/transport/errors.rs  | 27 +++++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/scylla/src/transport/cluster.rs b/scylla/src/transport/cluster.rs
index 2313402bc1..9bf9bc52ff 100644
--- a/scylla/src/transport/cluster.rs
+++ b/scylla/src/transport/cluster.rs
@@ -784,12 +784,13 @@ pub(crate) fn use_keyspace_result(
     for result in use_keyspace_results {
         match result {
             Ok(()) => was_ok = true,
-            Err(err) => match err {
-                QueryError::BrokenConnection(_) | QueryError::ConnectionPoolError(_) => {
+            Err(err) => {
+                if err.is_connection_broken() {
                     broken_conn_error = Some(err)
+                } else {
+                    return Err(err);
                 }
-                _ => return Err(err),
-            },
+            }
         }
     }
 
diff --git a/scylla/src/transport/errors.rs b/scylla/src/transport/errors.rs
index 8ef4a99437..d8d7365780 100644
--- a/scylla/src/transport/errors.rs
+++ b/scylla/src/transport/errors.rs
@@ -100,6 +100,33 @@ pub enum QueryError {
     RequestTimeout(String),
 }
 
+impl QueryError {
+    pub(crate) fn is_connection_broken(&self) -> bool {
+        // Do not remove this lint!
+        // It's there for a reason - we don't want new variants
+        // automatically fall under `_` pattern when they are introduced.
+        #[deny(clippy::wildcard_enum_match_arm)]
+        match self {
+            // Error variants that imply that some connection error appeared before/during execution.
+            QueryError::BrokenConnection(_) | QueryError::ConnectionPoolError(_) => true,
+
+            // Other errors.
+            QueryError::DbError(_, _)
+            | QueryError::BadQuery(_)
+            | QueryError::CqlRequestSerialization(_)
+            | QueryError::BodyExtensionsParseError(_)
+            | QueryError::EmptyPlan
+            | QueryError::CqlResultParseError(_)
+            | QueryError::CqlErrorParseError(_)
+            | QueryError::MetadataError(_)
+            | QueryError::ProtocolError(_)
+            | QueryError::TimeoutError
+            | QueryError::UnableToAllocStreamId
+            | QueryError::RequestTimeout(_) => false,
+        }
+    }
+}
+
 impl From<SerializeValuesError> for QueryError {
     fn from(serialized_err: SerializeValuesError) -> QueryError {
         QueryError::BadQuery(BadQuery::SerializeValuesError(serialized_err))

From 8dddca1c34f19516e2bad94f69967a36deba711d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miko=C5=82aj=20Uzarski?= <mikolaj.uzarski@scylladb.com>
Date: Fri, 4 Oct 2024 13:46:48 +0200
Subject: [PATCH 4/5] spec_exec: don't use wildcard '_' in QueryError match

Since last time, during error refactor I introduced a silent
bug to the code (https://github.com/scylladb/scylla-rust-driver/pull/1075),
I'd like to prevent that from happening in the future. This is why
we replace a `_` match with explicit error variants when deciding
if error received from speculative execution should be ignored.

We also enabled the `wildcard_enum_match_arm` clippy lint.
---
 scylla/src/transport/speculative_execution.rs | 27 ++++++++++++++++---
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/scylla/src/transport/speculative_execution.rs b/scylla/src/transport/speculative_execution.rs
index 10b60c4b75..172693f624 100644
--- a/scylla/src/transport/speculative_execution.rs
+++ b/scylla/src/transport/speculative_execution.rs
@@ -85,10 +85,29 @@ impl SpeculativeExecutionPolicy for PercentileSpeculativeExecutionPolicy {
 fn can_be_ignored<ResT>(result: &Result<ResT, QueryError>) -> bool {
     match result {
         Ok(_) => false,
-        Err(QueryError::BrokenConnection(_)) => true,
-        Err(QueryError::ConnectionPoolError(_)) => true,
-        Err(QueryError::TimeoutError) => true,
-        _ => false,
+        Err(e) => {
+            // Do not remove this lint!
+            // It's there for a reason - we don't want new variants
+            // automatically fall under `_` pattern when they are introduced.
+            #[deny(clippy::wildcard_enum_match_arm)]
+            match e {
+                QueryError::BrokenConnection(_)
+                | QueryError::ConnectionPoolError(_)
+                | QueryError::TimeoutError => true,
+
+                QueryError::DbError(_, _)
+                | QueryError::BadQuery(_)
+                | QueryError::CqlRequestSerialization(_)
+                | QueryError::BodyExtensionsParseError(_)
+                | QueryError::EmptyPlan
+                | QueryError::CqlResultParseError(_)
+                | QueryError::CqlErrorParseError(_)
+                | QueryError::MetadataError(_)
+                | QueryError::ProtocolError(_)
+                | QueryError::UnableToAllocStreamId
+                | QueryError::RequestTimeout(_) => false,
+            }
+        }
     }
 }
 

From c400b5fc19d263ab747cfb340968634de05cff12 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Miko=C5=82aj=20Uzarski?= <mikolaj.uzarski@scylladb.com>
Date: Tue, 8 Oct 2024 16:28:38 +0200
Subject: [PATCH 5/5] speculative_execution: fix can_be_ignored implementation

Previously, `can_be_ignored` function would return `true` for some
weird error variants.

I adjusted the implementation, and justified the decision for each
error variant in the comments.
---
 scylla/src/transport/speculative_execution.rs | 75 ++++++++++++++++---
 1 file changed, 64 insertions(+), 11 deletions(-)

diff --git a/scylla/src/transport/speculative_execution.rs b/scylla/src/transport/speculative_execution.rs
index 172693f624..ffcb0b2081 100644
--- a/scylla/src/transport/speculative_execution.rs
+++ b/scylla/src/transport/speculative_execution.rs
@@ -2,6 +2,7 @@ use futures::{
     future::FutureExt,
     stream::{FuturesUnordered, StreamExt},
 };
+use scylla_cql::frame::response::error::DbError;
 use std::{future::Future, sync::Arc, time::Duration};
 use tracing::{trace_span, warn, Instrument};
 
@@ -81,7 +82,10 @@ impl SpeculativeExecutionPolicy for PercentileSpeculativeExecutionPolicy {
     }
 }
 
-// checks if a result created in a speculative execution branch can be ignored
+/// Checks if a result created in a speculative execution branch can be ignored.
+///
+/// We should ignore errors such that their presence when executing the request
+/// on one node, does not imply that the same error will appear during retry on some other node.
 fn can_be_ignored<ResT>(result: &Result<ResT, QueryError>) -> bool {
     match result {
         Ok(_) => false,
@@ -91,21 +95,70 @@ fn can_be_ignored<ResT>(result: &Result<ResT, QueryError>) -> bool {
             // automatically fall under `_` pattern when they are introduced.
             #[deny(clippy::wildcard_enum_match_arm)]
             match e {
-                QueryError::BrokenConnection(_)
-                | QueryError::ConnectionPoolError(_)
-                | QueryError::TimeoutError => true,
-
-                QueryError::DbError(_, _)
-                | QueryError::BadQuery(_)
+                // Errors that will almost certainly appear for other nodes as well
+                QueryError::BadQuery(_)
                 | QueryError::CqlRequestSerialization(_)
                 | QueryError::BodyExtensionsParseError(_)
-                | QueryError::EmptyPlan
                 | QueryError::CqlResultParseError(_)
                 | QueryError::CqlErrorParseError(_)
-                | QueryError::MetadataError(_)
-                | QueryError::ProtocolError(_)
+                | QueryError::ProtocolError(_) => false,
+
+                // EmptyPlan is not returned by `Session::execute_query`.
+                // It is represented by None, which is then transformed
+                // to QueryError::EmptyPlan by the caller
+                // (either here is speculative_execution module, or for non-speculative execution).
+                // I believe this should not be ignored, since we do not expect it here.
+                QueryError::EmptyPlan => false,
+
+                // Errors that should not appear here, thus should not be ignored
+                QueryError::TimeoutError
+                | QueryError::RequestTimeout(_)
+                | QueryError::MetadataError(_) => false,
+
+                // Errors that can be ignored
+                QueryError::BrokenConnection(_)
                 | QueryError::UnableToAllocStreamId
-                | QueryError::RequestTimeout(_) => false,
+                | QueryError::ConnectionPoolError(_) => true,
+
+                // Handle DbErrors
+                QueryError::DbError(db_error, _) => {
+                    // Do not remove this lint!
+                    // It's there for a reason - we don't want new variants
+                    // automatically fall under `_` pattern when they are introduced.
+                    #[deny(clippy::wildcard_enum_match_arm)]
+                    match db_error {
+                        // Errors that will almost certainly appear on other nodes as well
+                        DbError::SyntaxError
+                        | DbError::Invalid
+                        | DbError::AlreadyExists { .. }
+                        | DbError::Unauthorized
+                        | DbError::ProtocolError => false,
+
+                        // Errors that should not appear there - thus, should not be ignored.
+                        DbError::AuthenticationError | DbError::Other(_) => false,
+
+                        // For now, let's assume that UDF failure is not transient - don't ignore it
+                        // TODO: investigate
+                        DbError::FunctionFailure { .. } => false,
+
+                        // Not sure when these can appear - don't ignore them
+                        // TODO: Investigate these errors
+                        DbError::ConfigError | DbError::TruncateError => false,
+
+                        // Errors that we can ignore and perform a retry on some other node
+                        DbError::Unavailable { .. }
+                        | DbError::Overloaded
+                        | DbError::IsBootstrapping
+                        | DbError::ReadTimeout { .. }
+                        | DbError::WriteTimeout { .. }
+                        | DbError::ReadFailure { .. }
+                        | DbError::WriteFailure { .. }
+                        // Preparation may succeed on some other node.
+                        | DbError::Unprepared { .. }
+                        | DbError::ServerError
+                        | DbError::RateLimitReached { .. } => true,
+                    }
+                }
             }
         }
     }