diff --git a/Source/Common/RootFSSetup.cpp b/Source/Common/RootFSSetup.cpp index ffe8a89337..4fe4ebfb3c 100644 --- a/Source/Common/RootFSSetup.cpp +++ b/Source/Common/RootFSSetup.cpp @@ -155,14 +155,13 @@ bool SendSocketPipe(std::string const &MountPath) { int Result = ppoll(&pfd, 1, &ts, nullptr); if (Result == -1 || Result == 0) { // didn't get ack back in time - // Close our read pipe - close(fds[0]); - // close our write pipe - close(fds[1]); - - // close socket - close(socket_fd); - return false; + // Assume an overburdened system at this point + // If the FEXMountDaemon is alive but slept for more than our timeout + // then we can spuriously throw errors + // + // Returning false here would cause FEX to try and spin up a new FEXMountDaemon + // and then the FEXMountDaemon would check to see if the lock exists + // Then would early exit and not mount a new path } // We've sent the message which means we're done with the socket diff --git a/Source/Tools/FEXMountDaemon/Main.cpp b/Source/Tools/FEXMountDaemon/Main.cpp index dc36c89847..2b1008e03b 100644 --- a/Source/Tools/FEXMountDaemon/Main.cpp +++ b/Source/Tools/FEXMountDaemon/Main.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -35,7 +36,10 @@ namespace EPollWatcher { static int epoll_fd{}; static std::thread EPollThread{}; static std::atomic EPollThreadTID{}; + static rlimit MaxFDs{}; + constexpr size_t static MAX_FD_DISTANCE = 32; std::atomic NumPipesWatched{}; + std::atomic NumFilesOpened{}; std::atomic EPollWatcherShutdown {false}; std::chrono::time_point TimeWhileZeroFDs{}; // Timeout is ten seconds @@ -45,6 +49,49 @@ namespace EPollWatcher { return NumPipesWatched.load(); } + size_t GetNumFilesOpen() { + // Walk /proc/self/fd/ to see how many open files we currently have + const std::filesystem::path self{"/proc/self/fd/"}; + + return std::distance(std::filesystem::directory_iterator{self}, std::filesystem::directory_iterator{}); + } + + void GetMaxFDs() { + // Get our kernel limit for the number of open files + if (getrlimit(RLIMIT_NOFILE, &MaxFDs) != 0) { + fprintf(stderr, "[FEXMountDaemon] getrlimit(RLIMIT_NOFILE) returned error %d %s\n", errno, strerror(errno)); + } + + // Walk /proc/self/fd/ to see how many open files we currently have + NumFilesOpened = GetNumFilesOpen(); + } + + void RaiseFDLimit() { + if (MaxFDs.rlim_cur == MaxFDs.rlim_max) { + fprintf(stderr, "[FEXMountDaemon] Our open FD limit is already set to max and we are wanting to increase it\n"); + fprintf(stderr, "[FEXMountDaemon] FEXMountDaemon will now no longer be able to track new instances of FEX\n"); + fprintf(stderr, "[FEXMountDaemon] Current limit is %zd(hard %zd) FDs and we are at %zd\n", MaxFDs.rlim_cur, MaxFDs.rlim_max, GetNumFilesOpen()); + fprintf(stderr, "[FEXMountDaemon] Ask your administrator to raise your kernel's hard limit on open FDs\n"); + return; + } + + rlimit NewLimit = MaxFDs; + + // Just multiply by two + NewLimit.rlim_cur <<= 1; + + // Now limit to the hard max + NewLimit.rlim_cur = std::min(NewLimit.rlim_cur, NewLimit.rlim_max); + + if (setrlimit(RLIMIT_NOFILE, &NewLimit) != 0) { + fprintf(stderr, "[FEXMountDaemon] Couldn't raise FD limit to %zd even though our hard limit is %zd\n", NewLimit.rlim_cur, NewLimit.rlim_max); + } + else { + // Set the new limit + MaxFDs = NewLimit; + } + } + void AddPipeToWatch(int pipe) { struct epoll_event evt{}; evt.events = EPOLLERR; // This event will return when the read end of a pipe is closed @@ -55,11 +102,20 @@ namespace EPollWatcher { } else { ++NumPipesWatched; + + if ((NumPipesWatched + NumFilesOpened) >= (MaxFDs.rlim_cur - MAX_FD_DISTANCE)) { + // We are close to the maximum FD distance + // Try to raise the limit + RaiseFDLimit(); + } } } void RemovePipeToWatch(int pipe) { int Result = epoll_ctl(epoll_fd, EPOLL_CTL_DEL, pipe, nullptr); + + // Make sure to close the pipe to not leak the FD + close(pipe); if (Result == -1) { fprintf(stderr, "[FEXMountDaemon] epoll_ctl returned error %d %s\n", errno, strerror(errno)); } @@ -81,7 +137,7 @@ namespace EPollWatcher { // Spin while we are not shutting down // Also spin while we have pipes to watch - while (!EPollWatcherShutdown.load() || NumPipesRemaining() != 0) { + while (!EPollWatcherShutdown.load()) { // Loop every ten seconds // epoll_pwait2 only available since kernel 5.11... int Result = epoll_pwait(epoll_fd, Events, MAX_EVENTS, 10 * 1000, nullptr); @@ -115,6 +171,7 @@ namespace EPollWatcher { } void SetupEPoll() { + GetMaxFDs(); epoll_fd = epoll_create1(EPOLL_CLOEXEC); EPollThread = std::thread{EPollWatcher::EPollWatch}; }