Skip to content
This repository was archived by the owner on Feb 25, 2025. It is now read-only.

Commit

Permalink
[Impeller] Test FenceWaiterVK and fix termination bugs (#45870)
Browse files Browse the repository at this point in the history
Fixes flutter/flutter#134751, and @jonahwilliams suspects it could be related to a number of other flaky/texture leak scenarios (flutter/flutter#133506 (comment)) that only happen sometimes (i.e. on CI but not real devices), i.e. stuff like:

```txt
--- Vulkan Debug Report  ----------------------------------------
|                Severity: Error
|                    Type: { Validation }
|                 ID Name: VUID-vkDestroyBuffer-buffer-00922
|               ID Number: -464217071
|       Queue Breadcrumbs: [NONE]
|  CMD Buffer Breadcrumbs: [NONE]
|         Related Objects: Device [94498356231456] [ImpellerDevice]
|                 Trigger: Validation Error: [ VUID-vkDestroyBuffer-buffer-00922 ] Object 0: handle = 0x55f21cf47d20, name = ImpellerDevice, type = VK_OBJECT_TYPE_DEVICE; | MessageID = 0xe4549c11 | Cannot call vkDestroyBuffer on VkBuffer 0xbb00000000bb[] that is currently in use by a command buffer. The Vulkan spec states: All submitted commands that refer to buffer, either directly or via a VkBufferView, must have completed execution (https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#VUID-vkDestroyBuffer-buffer-00922)
-----------------------------------------------------------------
```

---

~~This PR will look a bit like a mess until the last 2 PRs merge in, but locally it appears to fix fence races/segfaults that I was seeing on CI, including on Linux with validations enabled. We can test it further tomorrow.~~ EDIT: Updated.
  • Loading branch information
matanlurey authored and harryterkelsen committed Oct 23, 2023
1 parent 94464bf commit a0055e5
Show file tree
Hide file tree
Showing 9 changed files with 305 additions and 77 deletions.
1 change: 1 addition & 0 deletions ci/licenses_golden/excluded_files
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@
../../../flutter/impeller/renderer/backend/vulkan/blit_command_vk_unittests.cc
../../../flutter/impeller/renderer/backend/vulkan/command_encoder_vk_unittests.cc
../../../flutter/impeller/renderer/backend/vulkan/context_vk_unittests.cc
../../../flutter/impeller/renderer/backend/vulkan/fence_waiter_vk_unittests.cc
../../../flutter/impeller/renderer/backend/vulkan/pass_bindings_cache_unittests.cc
../../../flutter/impeller/renderer/backend/vulkan/resource_manager_vk_unittests.cc
../../../flutter/impeller/renderer/backend/vulkan/test
Expand Down
1 change: 1 addition & 0 deletions impeller/renderer/backend/vulkan/BUILD.gn
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ impeller_component("vulkan_unittests") {
"blit_command_vk_unittests.cc",
"command_encoder_vk_unittests.cc",
"context_vk_unittests.cc",
"fence_waiter_vk_unittests.cc",
"pass_bindings_cache_unittests.cc",
"resource_manager_vk_unittests.cc",
"test/mock_vulkan.cc",
Expand Down
4 changes: 0 additions & 4 deletions impeller/renderer/backend/vulkan/context_vk.cc
Original file line number Diff line number Diff line change
Expand Up @@ -377,10 +377,6 @@ void ContextVK::Setup(Settings settings) {
///
auto fence_waiter =
std::shared_ptr<FenceWaiterVK>(new FenceWaiterVK(device_holder));
if (!fence_waiter->IsValid()) {
VALIDATION_LOG << "Could not create fence waiter.";
return;
}

//----------------------------------------------------------------------------
/// Create the resource manager.
Expand Down
165 changes: 95 additions & 70 deletions impeller/renderer/backend/vulkan/fence_waiter_vk.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include <algorithm>
#include <chrono>
#include <utility>

#include "flutter/fml/thread.h"
#include "flutter/fml/trace_event.h"
Expand Down Expand Up @@ -47,28 +48,25 @@ class WaitSetEntry {
FenceWaiterVK::FenceWaiterVK(std::weak_ptr<DeviceHolder> device_holder)
: device_holder_(std::move(device_holder)) {
waiter_thread_ = std::make_unique<std::thread>([&]() { Main(); });
is_valid_ = true;
}

FenceWaiterVK::~FenceWaiterVK() {
Terminate();
if (waiter_thread_) {
waiter_thread_->join();
}
}

bool FenceWaiterVK::IsValid() const {
return is_valid_;
waiter_thread_->join();
}

bool FenceWaiterVK::AddFence(vk::UniqueFence fence,
const fml::closure& callback) {
TRACE_EVENT0("flutter", "FenceWaiterVK::AddFence");
if (!IsValid() || !fence || !callback) {
if (!fence || !callback) {
return false;
}
{
// Maintain the invariant that terminate_ is accessed only under the lock.
std::scoped_lock lock(wait_set_mutex_);
if (terminate_) {
return false;
}
wait_set_.emplace_back(WaitSetEntry::Create(std::move(fence), callback));
}
wait_set_cv_.notify_one();
Expand All @@ -92,85 +90,112 @@ void FenceWaiterVK::Main() {
using namespace std::literals::chrono_literals;

while (true) {
std::unique_lock lock(wait_set_mutex_);

// If there are no fences to wait on, wait on the condition variable.
wait_set_cv_.wait(lock, [&]() { return !wait_set_.empty() || terminate_; });
// We'll read the terminate_ flag within the lock below.
bool terminate = false;

// We don't want to check on fence status or collect wait set entries in the
// critical section. Copy the array of entries and immediately unlock the
// mutex.
WaitSet wait_set = wait_set_;
{
std::unique_lock lock(wait_set_mutex_);

const auto terminate = terminate_;
// If there are no fences to wait on, wait on the condition variable.
wait_set_cv_.wait(lock,
[&]() { return !wait_set_.empty() || terminate_; });

lock.unlock();
// Still under the lock, check if the waiter has been terminated.
terminate = terminate_;
}

if (terminate) {
WaitUntilEmpty();
break;
}

// Check if the context had died in the meantime.
auto device_holder = device_holder_.lock();
if (!device_holder) {
if (!Wait()) {
break;
}
}
}

const auto& device = device_holder->GetDevice();
void FenceWaiterVK::WaitUntilEmpty() {
// Note, there is no lock because once terminate_ is set to true, no other
// fence can be added to the wait set. Just in case, here's a FML_DCHECK:
FML_DCHECK(terminate_) << "Fence waiter must be terminated.";
while (!wait_set_.empty() && Wait()) {
// Intentionally empty.
}
}

// Wait for one or more fences to be signaled. Any additional fences added
// to the waiter will be serviced in the next pass. If a fence that is going
// to be signaled at an abnormally long deadline is the only one in the set,
// a timeout will bail out the wait.
auto fences = GetFencesForWaitSet(wait_set);
if (fences.empty()) {
continue;
}
bool FenceWaiterVK::Wait() {
// Snapshot the wait set and wait on the fences.
WaitSet wait_set;
{
std::scoped_lock lock(wait_set_mutex_);
wait_set = wait_set_;
}

auto result = device.waitForFences(
fences.size(), // fences count
fences.data(), // fences
false, // wait for all
std::chrono::nanoseconds{100ms}.count() // timeout (ns)
);
if (!(result == vk::Result::eSuccess || result == vk::Result::eTimeout)) {
VALIDATION_LOG << "Fence waiter encountered an unexpected error. Tearing "
"down the waiter thread.";
break;
}
using namespace std::literals::chrono_literals;

// One or more fences have been signaled. Find out which ones and update
// their signaled statuses.
{
TRACE_EVENT0("impeller", "CheckFenceStatus");
for (auto& entry : wait_set) {
entry->UpdateSignalledStatus(device);
}
wait_set.clear();
}
// Check if the context had died in the meantime.
auto device_holder = device_holder_.lock();
if (!device_holder) {
return false;
}

// Quickly acquire the wait set lock and erase signaled entries. Make sure
// the mutex is unlocked before calling the destructors of the erased
// entries. These might touch allocators.
WaitSet erased_entries;
{
static auto is_signalled = [](const auto& entry) {
return entry->IsSignalled();
};
std::scoped_lock lock(wait_set_mutex_);
std::copy_if(wait_set_.begin(), wait_set_.end(),
std::back_inserter(erased_entries), is_signalled);
wait_set_.erase(
std::remove_if(wait_set_.begin(), wait_set_.end(), is_signalled),
wait_set_.end());
}
const auto& device = device_holder->GetDevice();
// Wait for one or more fences to be signaled. Any additional fences added
// to the waiter will be serviced in the next pass. If a fence that is going
// to be signaled at an abnormally long deadline is the only one in the set,
// a timeout will bail out the wait.
auto fences = GetFencesForWaitSet(wait_set);
if (fences.empty()) {
return true;
}

{
TRACE_EVENT0("impeller", "ClearSignaledFences");
// Erase the erased entries which will invoke callbacks.
erased_entries.clear(); // Bit redundant because of scope but hey.
auto result = device.waitForFences(
/*fenceCount=*/fences.size(),
/*pFences=*/fences.data(),
/*waitAll=*/false,
/*timeout=*/std::chrono::nanoseconds{100ms}.count());
if (!(result == vk::Result::eSuccess || result == vk::Result::eTimeout)) {
VALIDATION_LOG << "Fence waiter encountered an unexpected error. Tearing "
"down the waiter thread.";
return false;
}

// One or more fences have been signaled. Find out which ones and update
// their signaled statuses.
{
TRACE_EVENT0("impeller", "CheckFenceStatus");
for (auto& entry : wait_set) {
entry->UpdateSignalledStatus(device);
}
wait_set.clear();
}

// Quickly acquire the wait set lock and erase signaled entries. Make sure
// the mutex is unlocked before calling the destructors of the erased
// entries. These might touch allocators.
WaitSet erased_entries;
{
static constexpr auto is_signalled = [](const auto& entry) {
return entry->IsSignalled();
};
std::scoped_lock lock(wait_set_mutex_);

// TODO(matanlurey): Iterate the list 1x by copying is_signaled into erased.
std::copy_if(wait_set_.begin(), wait_set_.end(),
std::back_inserter(erased_entries), is_signalled);
wait_set_.erase(
std::remove_if(wait_set_.begin(), wait_set_.end(), is_signalled),
wait_set_.end());
}

{
TRACE_EVENT0("impeller", "ClearSignaledFences");
// Erase the erased entries which will invoke callbacks.
erased_entries.clear(); // Bit redundant because of scope but hey.
}

return true;
}

void FenceWaiterVK::Terminate() {
Expand Down
4 changes: 3 additions & 1 deletion impeller/renderer/backend/vulkan/fence_waiter_vk.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,14 @@ class FenceWaiterVK {
std::condition_variable wait_set_cv_;
WaitSet wait_set_;
bool terminate_ = false;
bool is_valid_ = false;

explicit FenceWaiterVK(std::weak_ptr<DeviceHolder> device_holder);

void Main();

bool Wait();
void WaitUntilEmpty();

FML_DISALLOW_COPY_AND_ASSIGN(FenceWaiterVK);
};

Expand Down
130 changes: 130 additions & 0 deletions impeller/renderer/backend/vulkan/fence_waiter_vk_unittests.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
// Copyright 2013 The Flutter Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "fml/synchronization/waitable_event.h"
#include "gtest/gtest.h" // IWYU pragma: keep
#include "impeller/renderer/backend/vulkan/fence_waiter_vk.h" // IWYU pragma: keep
#include "impeller/renderer/backend/vulkan/test/mock_vulkan.h"

namespace impeller {
namespace testing {

TEST(FenceWaiterVKTest, IgnoresNullFence) {
auto const context = MockVulkanContextBuilder().Build();
auto const waiter = context->GetFenceWaiter();
EXPECT_FALSE(waiter->AddFence(vk::UniqueFence(), []() {}));
}

TEST(FenceWaiterVKTest, IgnoresNullCallback) {
auto const context = MockVulkanContextBuilder().Build();
auto const device = context->GetDevice();
auto const waiter = context->GetFenceWaiter();

auto fence = device.createFenceUnique({}).value;
EXPECT_FALSE(waiter->AddFence(std::move(fence), nullptr));
}

TEST(FenceWaiterVKTest, ExecutesFenceCallback) {
auto const context = MockVulkanContextBuilder().Build();
auto const device = context->GetDevice();
auto const waiter = context->GetFenceWaiter();

auto signal = fml::ManualResetWaitableEvent();
auto fence = device.createFenceUnique({}).value;
waiter->AddFence(std::move(fence), [&signal]() { signal.Signal(); });

signal.Wait();
}

TEST(FenceWaiterVKTest, ExecutesFenceCallbackX2) {
auto const context = MockVulkanContextBuilder().Build();
auto const device = context->GetDevice();
auto const waiter = context->GetFenceWaiter();

auto signal = fml::ManualResetWaitableEvent();
auto fence = device.createFenceUnique({}).value;
waiter->AddFence(std::move(fence), [&signal]() { signal.Signal(); });

auto signal2 = fml::ManualResetWaitableEvent();
auto fence2 = device.createFenceUnique({}).value;
waiter->AddFence(std::move(fence2), [&signal2]() { signal2.Signal(); });

signal.Wait();
signal2.Wait();
}

TEST(FenceWaiterVKTest, ExecutesNewFenceThenOldFence) {
auto const context = MockVulkanContextBuilder().Build();
auto const device = context->GetDevice();
auto const waiter = context->GetFenceWaiter();

auto signal = fml::ManualResetWaitableEvent();
auto fence = device.createFenceUnique({}).value;
MockFence::SetStatus(fence, vk::Result::eNotReady);
auto raw_fence = MockFence::GetRawPointer(fence);
waiter->AddFence(std::move(fence), [&signal]() { signal.Signal(); });

// The easiest way to verify that the callback was _not_ called is to wait
// for a timeout, but that could introduce flakiness. Instead, we'll add a
// second fence that will signal immediately, and wait for that one instead.
{
auto signal2 = fml::ManualResetWaitableEvent();
auto fence2 = device.createFenceUnique({}).value;
MockFence::SetStatus(fence2, vk::Result::eSuccess);
waiter->AddFence(std::move(fence2), [&signal2]() { signal2.Signal(); });
signal2.Wait();
}

// Now, we'll signal the first fence, and wait for the callback to be called.
raw_fence->SetStatus(vk::Result::eSuccess);

// Now, we'll signal the first fence, and wait for the callback to be called.
signal.Wait();
}

TEST(FenceWaiterVKTest, AddFenceDoesNothingIfTerminating) {
auto signal = fml::ManualResetWaitableEvent();

{
auto const context = MockVulkanContextBuilder().Build();
auto const device = context->GetDevice();
auto const waiter = context->GetFenceWaiter();
waiter->Terminate();

auto fence = device.createFenceUnique({}).value;
waiter->AddFence(std::move(fence), [&signal]() { signal.Signal(); });
}

// Ensure the fence did _not_ signal.
EXPECT_TRUE(signal.WaitWithTimeout(fml::TimeDelta::FromMilliseconds(100)));
}

TEST(FenceWaiterVKTest, InProgressFencesStillWaitIfTerminated) {
MockFence* raw_fence = nullptr;
auto signal = fml::ManualResetWaitableEvent();

auto const context = MockVulkanContextBuilder().Build();
auto const device = context->GetDevice();
auto const waiter = context->GetFenceWaiter();

// Add a fence that isn't signalled yet.
auto fence = device.createFenceUnique({}).value;

// Even if the fence is eSuccess, it's not guaranteed to be called in time.
MockFence::SetStatus(fence, vk::Result::eNotReady);
raw_fence = MockFence::GetRawPointer(fence);
waiter->AddFence(std::move(fence), [&signal]() { signal.Signal(); });

// Terminate the waiter.
waiter->Terminate();

// Signal the fence.
raw_fence->SetStatus(vk::Result::eSuccess);

// This will hang if the fence was not signalled.
signal.Wait();
}

} // namespace testing
} // namespace impeller
Loading

0 comments on commit a0055e5

Please sign in to comment.