-
Notifications
You must be signed in to change notification settings - Fork 58
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add View of Views debugging tool #267
Draft
dalg24
wants to merge
9
commits into
kokkos:develop
Choose a base branch
from
dalg24:vov-bug-finder
base: develop
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+283
−0
Draft
Changes from 3 commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
64ca586
Add View of Views debugging tool
dalg24 a5b4987
Remove trailing empty line
dalg24 63c2d2e
Fix typo and workaround lambda support in NVCC
dalg24 9e7a2d2
Filter out private scratch allocations and fix typo for real this time
dalg24 a8372b3
Fixup Threads backend does not label scratch pad allocs like other ho…
dalg24 497908d
Drop verbose option
dalg24 d511c6b
Enable linking against the tool
dalg24 4ab43a2
Use Christians big hammer
dalg24 2808025
Avoid unused parameter warning
dalg24 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
kp_add_library(kp_view_of_views_bug_finder kp_view_of_views_bug_finder.cpp) |
169 changes: 169 additions & 0 deletions
169
debugging/vov-bug-finder/kp_view_of_views_bug_finder.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
//@HEADER | ||
// ************************************************************************ | ||
// | ||
// Kokkos v. 4.0 | ||
// Copyright (2022) National Technology & Engineering | ||
// Solutions of Sandia, LLC (NTESS). | ||
// | ||
// Under the terms of Contract DE-NA0003525 with NTESS, | ||
// the U.S. Government retains certain rights in this software. | ||
// | ||
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://kokkos.org/LICENSE for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//@HEADER | ||
|
||
#include <kp_core.hpp> | ||
|
||
#include <cassert> | ||
#include <cstdint> | ||
#include <cstdlib> | ||
#include <iostream> | ||
#include <map> | ||
#include <mutex> | ||
#include <optional> | ||
#include <string> | ||
|
||
namespace { | ||
|
||
bool verbose = false; | ||
bool abort_on_error = true; | ||
|
||
class { | ||
uint64_t count_; | ||
std::map<uint64_t, std::string> map_; | ||
|
||
public: | ||
std::mutex mutex; | ||
uint64_t push(std::string s) { | ||
auto it = map_.emplace_hint(map_.end(), count_, std::move(s)); | ||
assert(++it == map_.end()); | ||
return count_++; | ||
} | ||
void pop(uint64_t x) { | ||
auto it = map_.find(x); | ||
assert(it != map_.end()); | ||
map_.erase(it); | ||
} | ||
std::string const &top() { | ||
assert(!map_.empty()); | ||
return map_.begin()->second; | ||
} | ||
bool is_empty() noexcept { return map_.empty(); } | ||
} current; | ||
|
||
bool ignore_fence(std::string_view s) { | ||
return (s == "Kokkos::Impl::ViewValueFunctor: View init/destroy fence") || | ||
(s == "Kokkos::ThreadsInternal::fence: Unnamed Instance Fence"); | ||
} | ||
|
||
std::optional<std::string> get_substr(std::string const &str, | ||
std::string_view prefix, | ||
std::string_view suffix) { | ||
if (auto found = str.find(prefix); found != std::string::npos) { | ||
found += prefix.length(); | ||
return str.substr(found, str.rfind(suffix) - found); | ||
} | ||
return std::nullopt; | ||
} | ||
|
||
} // namespace | ||
|
||
extern "C" void kokkosp_request_tool_settings( | ||
const uint32_t, Kokkos_Tools_ToolSettings *settings) { | ||
settings->requires_global_fencing = false; | ||
} | ||
|
||
extern "C" void kokkosp_begin_parallel_for(char const *kernelName, | ||
uint32_t deviceID, | ||
uint64_t *kernelID) { | ||
std::lock_guard lock(current.mutex); | ||
if (!current.is_empty()) { | ||
if (auto lbl = | ||
get_substr(kernelName, "Kokkos::View::initialization [", "]")) { | ||
std::cerr << "constructing view \"" << *lbl | ||
<< "\" within a parallel region \"" << current.top() << "\"\n"; | ||
if (abort_on_error) { | ||
std::abort(); | ||
} | ||
} | ||
} | ||
*kernelID = current.push(kernelName); | ||
|
||
if (verbose) { | ||
std::cout << "begin kernel " << *kernelID << " " << kernelName | ||
<< " on device " << deviceID << '\n'; | ||
} | ||
} | ||
|
||
extern "C" void kokkosp_end_parallel_for(uint64_t kernelID) { | ||
std::lock_guard lock(current.mutex); | ||
current.pop(kernelID); | ||
|
||
if (verbose) { | ||
std::cout << "end kernel " << kernelID << '\n'; | ||
} | ||
} | ||
|
||
extern "C" void kokkosp_begin_fence(char const *fenceName, uint32_t deviceID, | ||
uint64_t *fenceID) { | ||
std::lock_guard lock(current.mutex); | ||
if (!current.is_empty() && !ignore_fence(fenceName)) { | ||
if (auto lbl = | ||
get_substr(current.top(), "Kokkos::View::destruction [", "]")) { | ||
std::cerr << "view of views \"" << *lbl | ||
<< "\" not properly cleared this fence labelled \"" << fenceName | ||
<< "\" will hang\n"; | ||
if (abort_on_error) { | ||
std::abort(); | ||
} | ||
} | ||
} | ||
*fenceID = -1; | ||
|
||
if (verbose) { | ||
std::cout << "begin fence " << *fenceID << " " << fenceName << " on device " | ||
<< deviceID << '\n'; | ||
} | ||
} | ||
|
||
extern "C" void kokkosp_end_fence(uint64_t fenceID) { | ||
if (verbose) { | ||
std::cout << "end fence " << fenceID << '\n'; | ||
} | ||
} | ||
|
||
extern "C" void kokkosp_allocate_data(SpaceHandle handle, const char *name, | ||
void *ptr, uint64_t size) { | ||
std::lock_guard lock(current.mutex); | ||
if (!current.is_empty()) { | ||
std::cerr << "allocating \"" << name << "\" within parallel region \"" | ||
<< current.top() << "\"\n"; | ||
if (abort_on_error) { | ||
std::abort(); | ||
} | ||
} | ||
|
||
if (verbose) { | ||
std::cout << "alloc (" << handle.name << ") " << name << " pointer " << ptr | ||
<< "size " << size << '\n'; | ||
} | ||
} | ||
|
||
extern "C" void kokkosp_deallocate_data(SpaceHandle handle, const char *name, | ||
void *ptr, uint64_t size) { | ||
std::lock_guard lock(current.mutex); | ||
if (!current.is_empty()) { | ||
std::cerr << "deallocating \"" << name << "\" within parallel region \"" | ||
<< current.top() << "\"\n"; | ||
if (abort_on_error) { | ||
std::abort(); | ||
} | ||
} | ||
|
||
if (verbose) { | ||
std::cout << "dealloc (" << handle.name << ") " << name << " pointer " | ||
<< ptr << "size " << size << '\n'; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
kp_add_executable_and_test( | ||
TARGET_NAME test_vov_bug_finder | ||
SOURCE_FILE test_view_of_views_bug_finder.cpp | ||
KOKKOS_TOOLS_LIBS kp_view_of_views_bug_finder | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
//@HEADER | ||
// ************************************************************************ | ||
// | ||
// Kokkos v. 4.0 | ||
// Copyright (2022) National Technology & Engineering | ||
// Solutions of Sandia, LLC (NTESS). | ||
// | ||
// Under the terms of Contract DE-NA0003525 with NTESS, | ||
// the U.S. Government retains certain rights in this software. | ||
// | ||
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://kokkos.org/LICENSE for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//@HEADER | ||
|
||
#include "Kokkos_Core.hpp" | ||
#include "gtest/gtest.h" | ||
|
||
void test_no_throw_placement_new_in_parallel_for() { | ||
ASSERT_NO_THROW(({ | ||
using V = Kokkos::View<int *>; | ||
Kokkos::View<V **, Kokkos::HostSpace> vov( | ||
Kokkos::view_alloc("vov", Kokkos::WithoutInitializing), 2, 3); | ||
V a("a", 4); | ||
V b("b", 5); | ||
Kokkos::parallel_for( | ||
"Fine", Kokkos::RangePolicy<Kokkos::DefaultHostExecutionSpace>(0, 1), | ||
KOKKOS_LAMBDA(int) { | ||
new (&vov(0, 0)) V(a); | ||
new (&vov(0, 1)) V(a); | ||
new (&vov(1, 0)) V(b); | ||
}); | ||
})); | ||
} | ||
|
||
void test_death_allocation_in_parallel_for() { | ||
ASSERT_DEATH( | ||
({ | ||
using V = Kokkos::View<int *>; | ||
Kokkos::View<V **, Kokkos::HostSpace> vov( | ||
Kokkos::view_alloc("vov", Kokkos::WithoutInitializing), 2, 3); | ||
V a("a", 4); | ||
new (&vov(0, 0)) V(a); | ||
new (&vov(0, 1)) V(a); | ||
Kokkos::parallel_for( | ||
"AllocatesInParallel]For", | ||
Kokkos::RangePolicy<Kokkos::DefaultHostExecutionSpace>(0, 1), | ||
KOKKOS_LAMBDA(int) { | ||
V b("b", 5); | ||
new (&vov(1, 0)) V(b); | ||
}); | ||
}), | ||
"allocating \"b\" within parallel region \"AllocatesInParallel]For\""); | ||
} | ||
|
||
// TODO intialize in main and split unit tests | ||
TEST(ViewOfViews, find_bugs) { | ||
Kokkos::initialize(); | ||
{ | ||
ASSERT_NO_THROW(({ | ||
using V = Kokkos::View<int *>; | ||
Kokkos::View<V **, Kokkos::HostSpace> vov("vov", 2, 3); | ||
V a("a", 4); | ||
V b("b", 5); | ||
vov(0, 0) = a; | ||
vov(0, 1) = a; | ||
vov(1, 0) = b; | ||
|
||
vov(0, 0) = V(); | ||
vov(0, 1) = V(); | ||
vov(1, 0) = V(); | ||
})); | ||
|
||
ASSERT_NO_THROW(({ | ||
using V = Kokkos::View<int *>; | ||
Kokkos::View<V **, Kokkos::HostSpace> vov( | ||
Kokkos::view_alloc("vov", Kokkos::WithoutInitializing), 2, 3); | ||
V a("a", 4); | ||
V b("b", 5); | ||
new (&vov(0, 0)) V(a); | ||
new (&vov(0, 1)) V(a); | ||
new (&vov(1, 0)) V(b); | ||
|
||
vov(0, 0).~V(); | ||
vov(0, 1).~V(); | ||
// vov(1, 0).~V(); | ||
// ^ leaking "b" but not caught by the tool | ||
})); | ||
|
||
ASSERT_DEATH(({ | ||
using V = Kokkos::View<int *>; | ||
Kokkos::View<V **, Kokkos::HostSpace> vov("vo]v", 2, 3); | ||
// ^ included a closing square bracket in the label to try | ||
// to trip the substring extraction | ||
V a("a", 4); | ||
V b("b", 5); | ||
vov(0, 0) = a; | ||
vov(0, 1) = a; | ||
vov(1, 0) = b; | ||
}), | ||
"view of views \"vo]v\" not properly cleared"); | ||
Comment on lines
+107
to
+118
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just noting that in my OpenMP build, I see this death test hang when I run with more than 1 omp thread. (Christian had noted that team tests sometimes hang, but I'm not sure if it's related.) |
||
|
||
test_no_throw_placement_new_in_parallel_for(); | ||
|
||
test_death_allocation_in_parallel_for(); | ||
} | ||
Kokkos::finalize(); | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In a
Cuda
build, the compiler is complaining about this line that:The reason seems to be that we end up in a constructor that isn't marked with Kokkos markup. One option would be to desactivate this part of the test in builds with a device backend. I'm not sure whether there's a better solution to this problem.