From 3e975fe75d251eb0de2e04678e4fa48b67e06df1 Mon Sep 17 00:00:00 2001 From: Thomas Goyne Date: Sat, 18 Nov 2023 08:58:12 -0800 Subject: [PATCH] Fix client reset cycle detection for PBS recovery errors Tracking that a client reset was in progress was done in the same write transaction as the recovery operation, so if recovery failed the tracking was rolled back too. This worked for FLX due to that codepath committing before beginning recovery. --- CHANGELOG.md | 1 + src/realm/sync/noinst/client_reset.cpp | 2 ++ test/object-store/sync/client_reset.cpp | 27 +++++++++++++++++++------ test/test_client_reset.cpp | 13 +++++++----- 4 files changed, 32 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 870b89cc440..a542d4fab9d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ * ([#????](https://github.com/realm/realm-core/issues/????), since v?.?.?) * `Set::assign_intersection()` on `Set`, `Set`, and `Set` containing string or binary would cause a use-after-free if a set was intersected with itself ([PR #7144](https://github.com/realm/realm-core/pull/7144), since v10.0.0). * Set algebra on `Set` and `Set` gave incorrect results when used on platforms where `char` is signed ([#7135](https://github.com/realm/realm-core/issues/7135), since v13.23.3). +* Errors encountered while reapplying local changes for client reset recovery on partition-based sync Realms would result in the client reset attempt not being recorded, possibly resulting in an endless loop of attempting and failing to automatically recover the client reset. Flexible sync and errors from the server after completing the local recovery were handled correctly ([PR #7149](https://github.com/realm/realm-core/pull/7149), since v10.2.0). ### Breaking changes * None. diff --git a/src/realm/sync/noinst/client_reset.cpp b/src/realm/sync/noinst/client_reset.cpp index 0ff45279c1e..96e8e07f4a7 100644 --- a/src/realm/sync/noinst/client_reset.cpp +++ b/src/realm/sync/noinst/client_reset.cpp @@ -496,6 +496,8 @@ void track_reset(Transaction& wt, ClientResyncMode mode) {{version_col, metadata_version}, {timestamp_col, Timestamp(std::chrono::system_clock::now())}, {type_col, mode_val}}); + + wt.commit_and_continue_writing(); } static ClientResyncMode reset_precheck_guard(Transaction& wt, ClientResyncMode mode, bool recovery_is_allowed, diff --git a/test/object-store/sync/client_reset.cpp b/test/object-store/sync/client_reset.cpp index bc23a0d0efb..83bd548e170 100644 --- a/test/object-store/sync/client_reset.cpp +++ b/test/object-store/sync/client_reset.cpp @@ -1592,9 +1592,8 @@ TEST_CASE("sync: client reset", "[sync][pbs][client reset][baas]") { SECTION("a normal reset adds and removes a cycle detection flag") { local_config.sync_config->client_resync_mode = ClientResyncMode::RecoverOrDiscard; local_config.sync_config->notify_before_client_reset = [&](SharedRealm realm) { - auto flag = has_reset_cycle_flag(realm); - REQUIRE(!flag); - std::lock_guard lock(mtx); + REQUIRE_FALSE(has_reset_cycle_flag(realm)); + std::lock_guard lock(mtx); ++before_callback_invocations; }; local_config.sync_config->notify_after_client_reset = [&](SharedRealm, ThreadSafeReference realm_ref, @@ -1604,19 +1603,35 @@ TEST_CASE("sync: client reset", "[sync][pbs][client reset][baas]") { REQUIRE(bool(flag)); REQUIRE(flag->type == ClientResyncMode::Recover); REQUIRE(did_recover); - std::lock_guard lock(mtx); + std::lock_guard lock(mtx); ++after_callback_invocations; }; make_reset(local_config, remote_config) ->on_post_local_changes([&](SharedRealm realm) { - auto flag = has_reset_cycle_flag(realm); - REQUIRE(!flag); + REQUIRE_FALSE(has_reset_cycle_flag(realm)); }) ->run(); REQUIRE(!err); REQUIRE(before_callback_invocations == 1); REQUIRE(after_callback_invocations == 1); } + + SECTION("a failed reset leaves a cycle detection flag") { + local_config.sync_config->client_resync_mode = ClientResyncMode::Recover; + make_reset(local_config, remote_config) + ->make_local_changes([](SharedRealm realm) { + auto table = realm->read_group().get_table("class_object"); + table->remove_column(table->add_column(type_Int, "new col")); + }) + ->run(); + local_config.sync_config.reset(); + local_config.force_sync_history = true; + auto realm = Realm::get_shared_realm(local_config); + auto flag = has_reset_cycle_flag(realm); + REQUIRE(flag); + CHECK(flag->type == ClientResyncMode::Recover); + } + SECTION("In DiscardLocal mode: a previous failed discard reset is detected and generates an error") { local_config.sync_config->client_resync_mode = ClientResyncMode::DiscardLocal; make_fake_previous_reset(ClientResyncMode::DiscardLocal); diff --git a/test/test_client_reset.cpp b/test/test_client_reset.cpp index 663d2a78969..fa233e8fe2b 100644 --- a/test/test_client_reset.cpp +++ b/test/test_client_reset.cpp @@ -771,8 +771,9 @@ TEST(ClientReset_DoNotRecoverSchema) CHECK(!compare_groups(rt_1, rt_2)); const Group& group = rt_1.get_group(); - CHECK_EQUAL(group.size(), 1); + CHECK_EQUAL(group.size(), 2); CHECK(group.get_table("class_table1")); + CHECK(group.get_table("client_reset_metadata")); CHECK_NOT(group.get_table("class_table2")); const Group& group2 = rt_2.get_group(); CHECK_EQUAL(group2.size(), 1); @@ -867,8 +868,9 @@ void expect_reset(unit_test::TestContext& test_context, DB& target, DB& fresh, C CHECK_NOT(fresh.is_attached()); CHECK_NOT(util::File::exists(fresh_path)); - // Should have performed exactly one write on the target DB - CHECK_EQUAL(target.get_version_of_latest_snapshot(), db_version + 1); + // Should have performed exactly two writes on the target DB: one to track + // that we're attempting recovery, and one with the actual reset + CHECK_EQUAL(target.get_version_of_latest_snapshot(), db_version + 2); // Should have set the client file ident CHECK_EQUAL(target.start_read()->get_sync_file_id(), 100); @@ -895,8 +897,9 @@ void expect_reset(unit_test::TestContext& test_context, DB& target, DB& fresh, C CHECK_NOT(fresh.is_attached()); CHECK_NOT(util::File::exists(fresh_path)); - // Should have performed exactly one write on the target DB - CHECK_EQUAL(target.get_version_of_latest_snapshot(), db_version + 1); + // Should have performed exactly two writes on the target DB: one to track + // that we're attempting recovery, and one with the actual reset + CHECK_EQUAL(target.get_version_of_latest_snapshot(), db_version + 2); // Should have set the client file ident CHECK_EQUAL(target.start_read()->get_sync_file_id(), 100);