From 7820ded8d15355190bd1d8e6d9d93113eb61009a Mon Sep 17 00:00:00 2001 From: Kirill Burtsev Date: Fri, 20 Oct 2023 10:58:13 +0200 Subject: [PATCH] Fix blocked DB::open on multiprocess access on exFAT filesystem (#6959) Fix double file lock and DB::open being blocked with multiple concurrent realm access on fat32/exfat file systems. When file is truncated on fat32/exfat its uid is available for other files. With multiple processes opening and truncating the same set of files could lead to the situation when within one process get_unique_id will return the same value for different files in timing is right. This breaks proper initialization of static data for interprocess mutexes, so that subsequent locks will hang by trying to lock essentially the same file twice. --- CHANGELOG.md | 3 +- src/realm/util/interprocess_mutex.hpp | 10 +++-- test/test_file.cpp | 13 ++++++ test/test_shared.cpp | 64 +++++++++++++++++++++++++++ test/util/test_path.cpp | 14 ++++-- test/util/test_path.hpp | 9 +++- 6 files changed, 104 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3341966a461..16d31193fdc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ * `SyncManager::path_for_realm()` would return `//filename.realm.realm` if `custom_file_name` was set to `filename.realm` and the file didn't exist. It would correctly return `//filename.realm` if the file already existed. After this fix `//filename.realm` is returned in all cases. ([#7038](https://github.com/realm/realm-core/issues/7038)) * Fixed a bug preventing SSL handshake from completing successfuly due to failed hostname verification when linking against BoringSSL. (PR [#7034](https://github.com/realm/realm-core/pull/7034)) * Updating subscriptions did not trigger Realm autorefreshes, sometimes resulting in async refresh hanging until another write was performed by something else ([PR #7031](https://github.com/realm/realm-core/pull/7031)). +* Fix interprocess locking for concurrent realm file access resulting in a interprocess deadlock on FAT32/exFAT filesystems ([PR #6959](https://github.com/realm/realm-core/pull/6959)). ### Breaking changes * None. @@ -49,7 +50,7 @@ ### Enhancements * Allow collections of non-embedded links in asymmetric objects. ([PR #7003](https://github.com/realm/realm-core/pull/7003)) * Flexible sync API improvements: - - Erase Subscriptions by class type for C API. + - Erase Subscriptions by class type for C API. - `MutableSubscriptionSet::erase(iterator)` now runs in constant time. - Introduce `MutableSubscriptionSet::erase_by_id()`. - Introduce `MutableSubscriptionSet::erase_by_class_name()`. diff --git a/src/realm/util/interprocess_mutex.hpp b/src/realm/util/interprocess_mutex.hpp index a9354af5143..6702d4b8906 100644 --- a/src/realm/util/interprocess_mutex.hpp +++ b/src/realm/util/interprocess_mutex.hpp @@ -252,19 +252,23 @@ inline void InterprocessMutex::set_shared_part(SharedPart& shared_part, const st if (result != s_info_map->end()) { // File exists and the lock info has been created in the map. m_lock_info = result->second.lock(); + REALM_ASSERT_RELEASE(m_lock_info && m_lock_info->m_file.get_path() == m_filename); return; } } // LockInfo has not been created yet. m_lock_info = std::make_shared(); - // Always use mod_Write to open file and retreive the uid in case other process - // deletes the file. - m_lock_info->m_file.open(m_filename, File::mode_Write); + // Always open file for write and retreive the uid in case other process + // deletes the file. Avoid using just mode_Write (which implies truncate). + // On fat32/exfat uid could be reused by OS in a situation when + // multiple processes open and truncate the same lock file concurrently. + m_lock_info->m_file.open(m_filename, File::mode_Append); // exFAT does not allocate a unique id for the file until it's non-empty m_lock_info->m_file.resize(1); m_fileuid = m_lock_info->m_file.get_unique_id(); + REALM_ASSERT_RELEASE(s_info_map->find(m_fileuid) == s_info_map->end()); (*s_info_map)[m_fileuid] = m_lock_info; #elif defined(_WIN32) if (m_handle) { diff --git a/test/test_file.cpp b/test/test_file.cpp index c129dd2630b..5034b012c86 100644 --- a/test/test_file.cpp +++ b/test/test_file.cpp @@ -575,6 +575,19 @@ TEST(File_GetUniqueID) uid4_1 = uid4_2; CHECK_NOT(uid4_1 < uid4_2); CHECK_NOT(uid4_2 < uid4_1); + + file1_1.resize(0); + file2_1.resize(0); + file2_1.resize(1); + file1_1.resize(1); + if (!test_util::test_dir_is_exfat()) { + CHECK(uid1_1 == file1_1.get_unique_id()); + CHECK(uid2_1 == file2_1.get_unique_id()); + } + else { + CHECK(uid1_1 == file2_1.get_unique_id()); + CHECK(uid2_1 == file1_1.get_unique_id()); + } } TEST(File_Temp) diff --git a/test/test_shared.cpp b/test/test_shared.cpp index ad52a6cdc08..18175930625 100644 --- a/test/test_shared.cpp +++ b/test/test_shared.cpp @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -55,6 +56,7 @@ #include "test.hpp" #include "test_table_helper.hpp" +#include "util/spawned_process.hpp" extern unsigned int unit_test_random_seed; @@ -4345,4 +4347,66 @@ TEST(Shared_WriteToFail) CHECK(*tr == *dest); } +NONCONCURRENT_TEST_IF(Shared_LockFileConcurrentInit, testing_supports_spawn_process) +{ + auto path = realm::test_util::get_test_path(test_context.get_test_name(), ".test-dir"); + test_util::TestDirGuard test_dir(path, false); + test_dir.do_remove = SpawnedProcess::is_parent(); + auto lock_prefix = std::string(path) + "/lock"; + + struct Lock { + std::unique_ptr mutex; + std::unique_ptr sp; + + Lock(const std::string& name, const std::string& lock_prefix_path) + : mutex(std::make_unique()) + , sp(std::make_unique()) + { + mutex->set_shared_part(*sp, lock_prefix_path, name); + } + + Lock(Lock&&) = default; + Lock& operator=(Lock&&) = default; + }; + + for (size_t i = 0; i < 10; ++i) { + std::vector> spawned; + + // create multiple processes initializing multiple same purpose locks + for (size_t j = 0; j < 10; ++j) { + spawned.emplace_back( + test_util::spawn_process(test_context.test_details.test_name, util::format("child [%1]", i))); + + if (spawned.back()->is_child()) { + std::vector locks; + + // mimic the same impl detail as in DB and hope it'd trigger some assertions + for (auto tag : {"write", "control", "versions"}) { + locks.emplace_back(tag, lock_prefix); + CHECK(locks.back().mutex->is_valid()); + } + + // if somehow initialization is scrambled or there is an issues with + // underlying files then it should hang here + for (int k = 0; k < 3; ++k) { + for (auto&& lock : locks) + lock.mutex->lock(); + for (auto&& lock : locks) + lock.mutex->unlock(); + } + + exit(0); + } + } + + if (SpawnedProcess::is_parent()) { + for (auto&& process : spawned) + process->wait_for_child_to_finish(); + + // start everytime with no lock files for mutexes + test_dir.clean_dir(); + } + } +} + #endif // TEST_SHARED diff --git a/test/util/test_path.cpp b/test/util/test_path.cpp index bca0afd988d..ea1c70ed7d6 100644 --- a/test/util/test_path.cpp +++ b/test/util/test_path.cpp @@ -175,7 +175,8 @@ bool test_dir_is_exfat() // f_type or provide constants for them std::string fs_typename = fsbuf.f_fstypename; std::transform(fs_typename.begin(), fs_typename.end(), fs_typename.begin(), toLowerAscii); - return fs_typename.find(std::string("exfat")) != std::string::npos; + return fs_typename.find(std::string("exfat")) != std::string::npos || + fs_typename.find(std::string("msdos")) != std::string::npos; #else return false; #endif @@ -229,11 +230,12 @@ TestPathGuard& TestPathGuard::operator=(TestPathGuard&& other) noexcept } -TestDirGuard::TestDirGuard(const std::string& path) +TestDirGuard::TestDirGuard(const std::string& path, bool init_clean) : m_path(path) { if (!try_make_dir(path)) { - clean_dir(path); + if (init_clean) + clean_dir(path); } } @@ -241,6 +243,10 @@ TestDirGuard::~TestDirGuard() noexcept { if (g_keep_files) return; + + if (!do_remove) + return; + try { clean_dir(m_path); remove_dir(m_path); @@ -265,7 +271,7 @@ void do_clean_dir(const std::string& path, const std::string& guard_string) // Try to avoid accidental removal of precious files due to bugs in // TestDirGuard or TEST_DIR macro. if (subpath.find(guard_string) == std::string::npos) - throw std::runtime_error("Bad test dir path"); + throw std::runtime_error("Bad test dir path: " + path + ", guard: " + guard_string); File::remove(subpath); } } diff --git a/test/util/test_path.hpp b/test/util/test_path.hpp index b95f8b6e4fd..651ae2d4bfe 100644 --- a/test/util/test_path.hpp +++ b/test/util/test_path.hpp @@ -120,7 +120,7 @@ class TestPathGuard { /// directory, then removes the directory. class TestDirGuard { public: - TestDirGuard(const std::string& path); + TestDirGuard(const std::string& path, bool init_clean = true); ~TestDirGuard() noexcept; operator std::string() const { @@ -131,6 +131,13 @@ class TestDirGuard { return m_path.c_str(); } + bool do_remove = true; + + void clean_dir() + { + clean_dir(m_path); + } + private: std::string m_path; void clean_dir(const std::string& path);