-
Notifications
You must be signed in to change notification settings - Fork 411
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add random failpoint in critical paths #4876
Changes from 13 commits
ec3633d
26d26f7
43f7693
88d37a7
1ad20aa
5fb687d
9a56e2a
4d817fc
0eed31e
88d54d2
cb2a868
29cc539
1297fdd
c50d821
3d986a2
7537de7
89cba01
34ab976
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,7 +12,13 @@ | |
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#include <Common/Exception.h> | ||
#include <Common/FailPoint.h> | ||
#include <Poco/String.h> | ||
#include <Poco/StringTokenizer.h> | ||
#include <Poco/Util/LayeredConfiguration.h> | ||
#include <common/defines.h> | ||
#include <common/logger_useful.h> | ||
|
||
#include <boost/core/noncopyable.hpp> | ||
#include <condition_variable> | ||
|
@@ -21,7 +27,6 @@ | |
namespace DB | ||
{ | ||
std::unordered_map<String, std::shared_ptr<FailPointChannel>> FailPointHelper::fail_point_wait_channels; | ||
|
||
#define APPLY_FOR_FAILPOINTS_ONCE(M) \ | ||
M(exception_between_drop_meta_and_data) \ | ||
M(exception_between_alter_data_and_meta) \ | ||
|
@@ -105,13 +110,30 @@ std::unordered_map<String, std::shared_ptr<FailPointChannel>> FailPointHelper::f | |
M(pause_after_copr_streams_acquired) \ | ||
M(pause_before_server_merge_one_delta) | ||
|
||
#define APPLY_FOR_RANDOM_FAILPOINTS(M) \ | ||
M(random_tunnel_wait_timeout_failpoint) \ | ||
M(random_tunnel_init_rpc_failure_failpoint) \ | ||
M(random_receiver_sync_msg_push_failure_failpoint) \ | ||
M(random_receiver_async_msg_push_failure_failpoint) \ | ||
M(random_limit_check_failpoint) \ | ||
M(random_join_build_failpoint) \ | ||
M(random_join_prob_failpoint) \ | ||
M(random_aggregate_create_state_failpoint) \ | ||
M(random_aggregate_merge_failpoint) \ | ||
M(random_sharedquery_failpoint) \ | ||
M(random_interpreter_failpoint) \ | ||
M(random_task_lifecycle_failpoint) \ | ||
M(random_task_manager_find_task_failure_failpoint) \ | ||
M(random_min_tso_scheduler_failpoint) | ||
|
||
namespace FailPoints | ||
{ | ||
#define M(NAME) extern const char(NAME)[] = #NAME ""; | ||
APPLY_FOR_FAILPOINTS_ONCE(M) | ||
APPLY_FOR_FAILPOINTS(M) | ||
APPLY_FOR_FAILPOINTS_ONCE_WITH_CHANNEL(M) | ||
APPLY_FOR_FAILPOINTS_WITH_CHANNEL(M) | ||
APPLY_FOR_RANDOM_FAILPOINTS(M) | ||
#undef M | ||
} // namespace FailPoints | ||
|
||
|
@@ -175,7 +197,7 @@ void FailPointHelper::enableFailPoint(const String & fail_point_name) | |
#undef M | ||
#undef SUB_M | ||
|
||
throw Exception("Cannot find fail point " + fail_point_name, ErrorCodes::FAIL_POINT_ERROR); | ||
throw Exception(fmt::format("Cannot find fail point {}", fail_point_name), ErrorCodes::FAIL_POINT_ERROR); | ||
} | ||
|
||
void FailPointHelper::disableFailPoint(const String & fail_point_name) | ||
|
@@ -200,6 +222,41 @@ void FailPointHelper::wait(const String & fail_point_name) | |
ptr->wait(); | ||
} | ||
} | ||
|
||
void FailPointHelper::initRandomFailPoints(Poco::Util::LayeredConfiguration & config, Poco::Logger * log) | ||
{ | ||
String random_fail_point_cfg = config.getString("flash.random_fail_points", ""); | ||
if (random_fail_point_cfg.empty()) | ||
return; | ||
|
||
Poco::StringTokenizer string_tokens(random_fail_point_cfg, ","); | ||
for (const auto & string_token : string_tokens) | ||
{ | ||
Poco::StringTokenizer pair_tokens(string_token, "-"); | ||
RUNTIME_ASSERT((pair_tokens.count() == 2), log, "RandomFailPoints config should be FailPointA-RatioA,FailPointB-RatioB,... format"); | ||
double rate = atof(pair_tokens[1].c_str()); //NOLINT(cert-err34-c): check conversion error manually | ||
RUNTIME_ASSERT((0 <= rate && rate <= 1.0), log, "RandomFailPoint trigger rate should in [0,1], while {}", rate); | ||
enableRandomFailPoint(pair_tokens[0], rate); | ||
} | ||
LOG_FMT_INFO(log, "Enable RandomFailPoints: {}", random_fail_point_cfg); | ||
} | ||
|
||
void FailPointHelper::enableRandomFailPoint(const String & fail_point_name, double rate) | ||
{ | ||
#define SUB_M(NAME) \ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. need an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice, done. |
||
if (fail_point_name == FailPoints::NAME) \ | ||
{ \ | ||
fiu_enable_random(FailPoints::NAME, 1, nullptr, 0, rate); \ | ||
return; \ | ||
} | ||
|
||
#define M(NAME) SUB_M(NAME) | ||
APPLY_FOR_RANDOM_FAILPOINTS(M) | ||
#undef M | ||
#undef SUB_M | ||
|
||
throw Exception(fmt::format("Cannot find fail point {}", fail_point_name), ErrorCodes::FAIL_POINT_ERROR); | ||
} | ||
#else | ||
class FailPointChannel | ||
{ | ||
|
@@ -210,6 +267,10 @@ void FailPointHelper::enableFailPoint(const String &) {} | |
void FailPointHelper::disableFailPoint(const String &) {} | ||
|
||
void FailPointHelper::wait(const String &) {} | ||
|
||
void FailPointHelper::initRandomFailPoints(Poco::Util::LayeredConfiguration & config, Poco::Logger * log) {} | ||
|
||
void FailPointHelper::enableRandomFailPoint(const String & fail_point_name, double rate) {} | ||
#endif | ||
|
||
} // namespace DB |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,7 @@ | |
#include <Common/FailPoint.h> | ||
#include <Common/ThreadFactory.h> | ||
#include <Common/TiFlashMetrics.h> | ||
#include <Common/randomSeed.h> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. useless header file? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed |
||
#include <Flash/Mpp/MPPTunnel.h> | ||
#include <Flash/Mpp/Utils.h> | ||
#include <fmt/core.h> | ||
|
@@ -25,6 +26,7 @@ namespace DB | |
namespace FailPoints | ||
{ | ||
extern const char exception_during_mpp_close_tunnel[]; | ||
extern const char random_tunnel_wait_timeout_failpoint[]; | ||
} // namespace FailPoints | ||
|
||
template <typename Writer> | ||
|
@@ -322,6 +324,7 @@ void MPPTunnelBase<Writer>::waitUntilConnectedOrFinished(std::unique_lock<std::m | |
auto res = cv_for_connected_or_finished.wait_for(lk, timeout, connected_or_finished); | ||
LOG_FMT_TRACE(log, "end waitUntilConnectedOrFinished"); | ||
|
||
fiu_do_on(FailPoints::random_tunnel_wait_timeout_failpoint, res = false;); | ||
if (!res) | ||
throw Exception(tunnel_id + " is timeout"); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
are there some test plans? how about open failpoint for ci test
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Plan to add a regular cluster test, since some failpoints need to be test under parrel workloads.