Skip to content

Commit

Permalink
RCORE-1386 Track client reset reason in table that detects client res…
Browse files Browse the repository at this point in the history
…et cycles (#7649)

* Broke out the client reset error and action storage from PR #7542
* Removed client reset recovery_allowed flag and other updates from review
* Updated pending_client_reset store to use the schema metadata tables
* Fixed pausing a session does not hold the DB open test
* Moved ownership of reset store to SessionWrapper
* Fixed migration test crash - need to save client reset error in handle fresh realm downloaded
* Updated PendingResetStore to be static functions instead of an initialized object; updates from review
* Make ClientReset::error no longer optional; fixed subscriptions tests
* updated changelog after release
* updates from review
  • Loading branch information
Michael Wilkerson-Barker authored May 31, 2024
1 parent ae3b22a commit 25212bc
Show file tree
Hide file tree
Showing 29 changed files with 981 additions and 436 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

### Enhancements
* <New feature description> (PR [#????](https://github.com/realm/realm-core/pull/????))
* Report the originating error that caused a client reset to occur. ([#6154](https://github.com/realm/realm-core/issues/6154))

### Fixed
* <How do the end-user experience this issue? what was the impact?> ([#????](https://github.com/realm/realm-core/issues/????), since v?.?.?)
Expand All @@ -20,6 +21,7 @@
* Work around a bug in VC++ that resulted in runtime errors when running the tests in a debug build (#[7741](https://github.com/realm/realm-core/issues/7741)).
* Refactor `sync::Session` to eliminate the bind() step of session creation ([#7609](https://github.com/realm/realm-core/pull/7609)).
* Add ScopeExitFail which only calls the handler if exiting the scope via an uncaught exception ([#7609](https://github.com/realm/realm-core/pull/7609)).
* Add the originating error and server requests action that caused a client reset to occur to the client reset tracking metadata storage. ([PR #7649](https://github.com/realm/realm-core/pull/7649))

----------------------------------------------

Expand Down
1 change: 1 addition & 0 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ let notSyncServerSources: [String] = [
"realm/sync/noinst/compact_changesets.cpp",
"realm/sync/noinst/migration_store.cpp",
"realm/sync/noinst/pending_bootstrap_store.cpp",
"realm/sync/noinst/pending_reset_store.cpp",
"realm/sync/noinst/protocol_codec.cpp",
"realm/sync/noinst/sync_metadata_schema.cpp",
"realm/sync/noinst/sync_schema_migration.cpp",
Expand Down
16 changes: 16 additions & 0 deletions src/realm/db.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,22 @@ inline int DB::get_file_format_version() const noexcept
return m_file_format_version;
}

inline std::ostream& operator<<(std::ostream& os, const DB::TransactStage& stage)
{
switch (stage) {
case DB::TransactStage::transact_Ready:
return os << "transact_Ready";
case DB::TransactStage::transact_Reading:
return os << "transact_Reading";
case DB::TransactStage::transact_Frozen:
return os << "transact_Frozen";
case DB::TransactStage::transact_Writing:
return os << "transact_Writing";
}
REALM_UNREACHABLE();
}


} // namespace realm

#endif // REALM_DB_HPP
91 changes: 47 additions & 44 deletions src/realm/object-store/sync/sync_session.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,17 +422,15 @@ void SyncSession::update_error_and_mark_file_for_deletion(SyncError& error, Shou
}
}

void SyncSession::download_fresh_realm(sync::ProtocolErrorInfo::Action server_requests_action)
void SyncSession::download_fresh_realm(const sync::SessionErrorInfo& error_info)
{
// first check that recovery will not be prevented
if (server_requests_action == sync::ProtocolErrorInfo::Action::ClientResetNoRecovery) {
if (error_info.server_requests_action == sync::ProtocolErrorInfo::Action::ClientResetNoRecovery) {
auto mode = config(&SyncConfig::client_resync_mode);
if (mode == ClientResyncMode::Recover) {
handle_fresh_realm_downloaded(
nullptr,
{ErrorCodes::RuntimeError,
"A client reset is required but the server does not permit recovery for this client"},
server_requests_action);
nullptr, {ErrorCodes::RuntimeError,
"A client reset is required but the server does not permit recovery for this client"});
return;
}
}
Expand Down Expand Up @@ -469,14 +467,15 @@ void SyncSession::download_fresh_realm(sync::ProtocolErrorInfo::Action server_re
catch (...) {
// Failed to open the fresh path after attempting to delete it, so we
// just can't do automatic recovery.
handle_fresh_realm_downloaded(nullptr, exception_to_status(), server_requests_action);
handle_fresh_realm_downloaded(nullptr, exception_to_status());
return;
}

util::CheckedLockGuard state_lock(m_state_mutex);
if (m_state != State::Active) {
return;
}

RealmConfig fresh_config;
{
util::CheckedLockGuard config_lock(m_config_mutex);
Expand Down Expand Up @@ -511,7 +510,7 @@ void SyncSession::download_fresh_realm(sync::ProtocolErrorInfo::Action server_re
using SubscriptionState = sync::SubscriptionSet::State;
fresh_sub.get_state_change_notification(SubscriptionState::Complete)
.then([=](SubscriptionState) -> util::Future<sync::SubscriptionSet> {
if (server_requests_action != sync::ProtocolErrorInfo::Action::MigrateToFLX) {
if (error_info.server_requests_action != sync::ProtocolErrorInfo::Action::MigrateToFLX) {
return fresh_sub;
}
if (!self->m_migration_store->is_migration_in_progress()) {
Expand All @@ -527,7 +526,7 @@ void SyncSession::download_fresh_realm(sync::ProtocolErrorInfo::Action server_re
fresh_sync_session->m_migration_store->create_subscriptions(*fresh_sub_store, *query_string);
return fresh_sub_store->get_latest()
.get_state_change_notification(SubscriptionState::Complete)
.then([=](SubscriptionState) {
.then([fresh_sub_store](SubscriptionState) {
return fresh_sub_store->get_latest();
});
})
Expand All @@ -536,29 +535,32 @@ void SyncSession::download_fresh_realm(sync::ProtocolErrorInfo::Action server_re
// it immediately
fresh_sync_session->force_close();
if (subs.is_ok()) {
self->handle_fresh_realm_downloaded(db, Status::OK(), server_requests_action,
std::move(subs.get_value()));
self->handle_fresh_realm_downloaded(db, std::move(error_info), std::move(subs.get_value()));
}
else {
self->handle_fresh_realm_downloaded(nullptr, subs.get_status(), server_requests_action);
self->handle_fresh_realm_downloaded(nullptr, std::move(subs.get_status()));
}
});
}
else { // pbs
fresh_sync_session->wait_for_download_completion([=, weak_self = weak_from_this()](Status s) {
fresh_sync_session->wait_for_download_completion([=, weak_self = weak_from_this()](Status status) {
// Keep the sync session alive while it's downloading, but then close
// it immediately
fresh_sync_session->force_close();
if (auto strong_self = weak_self.lock()) {
strong_self->handle_fresh_realm_downloaded(db, s, server_requests_action);
if (status.is_ok()) {
strong_self->handle_fresh_realm_downloaded(db, std::move(error_info));
}
else {
strong_self->handle_fresh_realm_downloaded(nullptr, std::move(status));
}
}
});
}
fresh_sync_session->revive_if_needed();
}

void SyncSession::handle_fresh_realm_downloaded(DBRef db, Status status,
sync::ProtocolErrorInfo::Action server_requests_action,
void SyncSession::handle_fresh_realm_downloaded(DBRef db, StatusWith<sync::SessionErrorInfo> error_info,
std::optional<sync::SubscriptionSet> new_subs)
{
util::CheckedUniqueLock lock(m_state_mutex);
Expand All @@ -569,15 +571,15 @@ void SyncSession::handle_fresh_realm_downloaded(DBRef db, Status status,
// - unable to write the fresh copy to the file system
// - during download of the fresh copy, the fresh copy itself is reset
// - in FLX mode there was a problem fulfilling the previously active subscription
if (!status.is_ok()) {
if (status == ErrorCodes::OperationAborted) {
if (!error_info.is_ok()) {
if (error_info.get_status() == ErrorCodes::OperationAborted) {
return;
}
lock.unlock();

sync::SessionErrorInfo synthetic(
Status{ErrorCodes::AutoClientResetFailed,
util::format("A fatal error occurred during client reset: '%1'", status.reason())},
util::format("A fatal error occurred during client reset: '%1'", error_info.get_status())},
sync::IsFatal{true});
handle_error(synthetic);
return;
Expand All @@ -593,9 +595,12 @@ void SyncSession::handle_fresh_realm_downloaded(DBRef db, Status status,
// that moving to the inactive state doesn't clear them - they will be
// re-registered when the session becomes active again.
{
m_server_requests_action = server_requests_action;
m_client_reset_fresh_copy = db;
CompletionCallbacks callbacks;
// Save the client reset error for when the original sync session is revived
REALM_ASSERT(error_info.is_ok()); // required if we get here
m_client_reset_error = std::move(error_info.get_value());

std::swap(m_completion_callbacks, callbacks);
// always swap back, even if advance_state throws
auto guard = util::make_scope_exit([&]() noexcept {
Expand All @@ -607,11 +612,13 @@ void SyncSession::handle_fresh_realm_downloaded(DBRef db, Status status,
});
// Do not cancel the notifications on subscriptions.
bool cancel_subscription_notifications = false;
bool is_migration =
m_client_reset_error->server_requests_action == sync::ProtocolErrorInfo::Action::MigrateToFLX ||
m_client_reset_error->server_requests_action == sync::ProtocolErrorInfo::Action::RevertToPBS;
become_inactive(std::move(lock), Status::OK(), cancel_subscription_notifications); // unlocks the lock

// Once the session is inactive, update sync config and subscription store after migration.
if (server_requests_action == sync::ProtocolErrorInfo::Action::MigrateToFLX ||
server_requests_action == sync::ProtocolErrorInfo::Action::RevertToPBS) {
if (is_migration) {
apply_sync_config_after_migration_or_rollback();
auto flx_sync_requested = config(&SyncConfig::flx_sync_requested);
update_subscription_store(flx_sync_requested, std::move(new_subs));
Expand Down Expand Up @@ -695,7 +702,7 @@ void SyncSession::handle_error(sync::SessionErrorInfo error)
case ClientResyncMode::RecoverOrDiscard:
[[fallthrough]];
case ClientResyncMode::Recover:
download_fresh_realm(error.server_requests_action);
download_fresh_realm(error);
return; // do not propagate the error to the user at this point
}
break;
Expand All @@ -707,7 +714,7 @@ void SyncSession::handle_error(sync::SessionErrorInfo error)
m_migration_store->migrate_to_flx(*error.migration_query_string,
m_original_sync_config->partition_value);
save_sync_config_after_migration_or_rollback();
download_fresh_realm(error.server_requests_action);
download_fresh_realm(error);
return;
case sync::ProtocolErrorInfo::Action::RevertToPBS:
// If the client was updated to use FLX natively, but the server was rolled back to PBS,
Expand All @@ -721,7 +728,7 @@ void SyncSession::handle_error(sync::SessionErrorInfo error)
// Original config was PBS, rollback the migration
m_migration_store->rollback_to_pbs();
save_sync_config_after_migration_or_rollback();
download_fresh_realm(error.server_requests_action);
download_fresh_realm(error);
return;
case sync::ProtocolErrorInfo::Action::RefreshUser:
if (auto u = user()) {
Expand Down Expand Up @@ -824,17 +831,15 @@ void SyncSession::handle_progress_update(uint64_t downloaded, uint64_t downloada
upload_estimate, query_version);
}

static sync::Session::Config::ClientReset make_client_reset_config(const RealmConfig& base_config,
const std::shared_ptr<SyncConfig>& sync_config,
DBRef&& fresh_copy, bool recovery_is_allowed,
bool schema_migration_detected)

static sync::Session::Config::ClientReset
make_client_reset_config(const RealmConfig& base_config, const std::shared_ptr<SyncConfig>& sync_config,
DBRef&& fresh_copy, sync::SessionErrorInfo&& error_info, bool schema_migration_detected)
{
REALM_ASSERT(sync_config->client_resync_mode != ClientResyncMode::Manual);

sync::Session::Config::ClientReset config;
config.mode = sync_config->client_resync_mode;
config.fresh_copy = std::move(fresh_copy);
config.recovery_is_allowed = recovery_is_allowed;
sync::Session::Config::ClientReset config{sync_config->client_resync_mode, std::move(fresh_copy),
std::move(error_info.status), error_info.server_requests_action};

// The conditions here are asymmetric because if we have *either* a before
// or after callback we need to make sure to initialize the local schema
Expand Down Expand Up @@ -940,17 +945,15 @@ void SyncSession::create_sync_session()
}
session_config.custom_http_headers = sync_config.custom_http_headers;

if (m_server_requests_action != sync::ProtocolErrorInfo::Action::NoAction) {
// Migrations are allowed to recover local data.
const bool allowed_to_recover = m_server_requests_action == sync::ProtocolErrorInfo::Action::ClientReset ||
m_server_requests_action == sync::ProtocolErrorInfo::Action::MigrateToFLX ||
m_server_requests_action == sync::ProtocolErrorInfo::Action::RevertToPBS;
// Use the original sync config, not the updated one from the migration store
session_config.client_reset_config =
make_client_reset_config(m_config, m_original_sync_config, std::move(m_client_reset_fresh_copy),
allowed_to_recover, m_previous_schema_version.has_value());
session_config.schema_version = m_previous_schema_version.value_or(m_config.schema_version);
m_server_requests_action = sync::ProtocolErrorInfo::Action::NoAction;
if (m_client_reset_error) {
auto client_reset_error = std::exchange(m_client_reset_error, std::nullopt);
if (client_reset_error->server_requests_action != sync::ProtocolErrorInfo::Action::NoAction) {
// Use the original sync config, not the updated one from the migration store
session_config.client_reset_config =
make_client_reset_config(m_config, m_original_sync_config, std::move(m_client_reset_fresh_copy),
std::move(*client_reset_error), m_previous_schema_version.has_value());
session_config.schema_version = m_previous_schema_version.value_or(m_config.schema_version);
}
}

session_config.progress_handler = [weak_self](uint_fast64_t downloaded, uint_fast64_t downloadable,
Expand Down
10 changes: 4 additions & 6 deletions src/realm/object-store/sync/sync_session.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <realm/object-store/feature_checks.hpp>
#include <realm/object-store/shared_realm.hpp>
#include <realm/object-store/sync/generic_network_transport.hpp>
#include <realm/sync/client_base.hpp>
#include <realm/sync/config.hpp>
#include <realm/sync/subscriptions.hpp>

Expand All @@ -40,7 +41,6 @@ class SyncUser;

namespace sync {
class Session;
struct SessionErrorInfo;
class MigrationStore;
} // namespace sync

Expand Down Expand Up @@ -406,10 +406,9 @@ class SyncSession : public std::enable_shared_from_this<SyncSession> {
void apply_sync_config_after_migration_or_rollback() REQUIRES(!m_config_mutex, !m_state_mutex);
void save_sync_config_after_migration_or_rollback() REQUIRES(!m_config_mutex);

void download_fresh_realm(sync::ProtocolErrorInfo::Action server_requests_action)
void download_fresh_realm(const sync::SessionErrorInfo& error_info)
REQUIRES(!m_config_mutex, !m_state_mutex, !m_connection_state_mutex);
void handle_fresh_realm_downloaded(DBRef db, Status status,
sync::ProtocolErrorInfo::Action server_requests_action,
void handle_fresh_realm_downloaded(DBRef db, StatusWith<sync::SessionErrorInfo> error_info,
std::optional<sync::SubscriptionSet> new_subs = std::nullopt)
REQUIRES(!m_state_mutex, !m_config_mutex, !m_connection_state_mutex);
void handle_error(sync::SessionErrorInfo) REQUIRES(!m_state_mutex, !m_config_mutex, !m_connection_state_mutex);
Expand Down Expand Up @@ -504,8 +503,7 @@ class SyncSession : public std::enable_shared_from_this<SyncSession> {
std::shared_ptr<SyncConfig> m_migrated_sync_config GUARDED_BY(m_config_mutex);
const std::shared_ptr<sync::MigrationStore> m_migration_store;
std::optional<int64_t> m_migration_sentinel_query_version GUARDED_BY(m_state_mutex);
sync::ProtocolErrorInfo::Action
m_server_requests_action GUARDED_BY(m_state_mutex) = sync::ProtocolErrorInfo::Action::NoAction;
std::optional<sync::SessionErrorInfo> m_client_reset_error GUARDED_BY(m_state_mutex);
DBRef m_client_reset_fresh_copy GUARDED_BY(m_state_mutex);
_impl::SyncClient& m_client;
SyncManager* m_sync_manager GUARDED_BY(m_state_mutex) = nullptr;
Expand Down
1 change: 1 addition & 0 deletions src/realm/sync/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ set(SYNC_SOURCES
noinst/compact_changesets.cpp
noinst/migration_store.cpp
noinst/pending_bootstrap_store.cpp
noinst/pending_reset_store.cpp
noinst/protocol_codec.cpp
noinst/sync_metadata_schema.cpp
noinst/sync_schema_migration.cpp
Expand Down
Loading

0 comments on commit 25212bc

Please sign in to comment.