-
Notifications
You must be signed in to change notification settings - Fork 12.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[ASan][libc++] Turn on ASan annotations for short strings #75882
[ASan][libc++] Turn on ASan annotations for short strings #75882
Conversation
@llvm/pr-subscribers-libcxx Author: Tacet (AdvenamTacet) ChangesThis commit turns on ASan annotations in Originally suggested here: https://reviews.llvm.org/D147680 String annotations added here: #72677 Annotating Support in ASan API exists since dd1b7b7. You can turn off annotations for a specific allocator based on changes from 2fa1bec. This PR is a part of a series of patches extending AddressSanitizer C++ container overflow detection capabilities by adding annotations, similar to those existing in The introduction of these annotations was spurred by a real-world software bug discovered by Trail of Bits, involving an out-of-bounds memory access during the comparison of two strings using the If you have any questions, please email:
Full diff: https://github.com/llvm/llvm-project/pull/75882.diff 5 Files Affected:
diff --git a/libcxx/include/string b/libcxx/include/string
index fdffca5aed18be..0be2d4ca8b212b 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -659,7 +659,6 @@ _LIBCPP_PUSH_MACROS
#else
# define _LIBCPP_STRING_INTERNAL_MEMORY_ACCESS
#endif
-#define _LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED false
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -1899,7 +1898,7 @@ private:
// ASan: short string is poisoned if and only if this function returns true.
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __asan_short_string_is_annotated() const _NOEXCEPT {
- return _LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED && !__libcpp_is_constant_evaluated();
+ return !__libcpp_is_constant_evaluated();
}
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __annotate_new(size_type __current_size) const _NOEXCEPT {
diff --git a/libcxx/test/libcxx/containers/strings/basic.string/asan_deque_integration.pass.cpp b/libcxx/test/libcxx/containers/strings/basic.string/asan_deque_integration.pass.cpp
new file mode 100644
index 00000000000000..95a025ced7eab8
--- /dev/null
+++ b/libcxx/test/libcxx/containers/strings/basic.string/asan_deque_integration.pass.cpp
@@ -0,0 +1,180 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: asan
+// UNSUPPORTED: c++03
+
+#include <cassert>
+#include <array>
+#include "test_macros.h"
+#include "asan_testing.h" // includes deque and string - don't do it before
+
+// This tests exists to check if strings work well with deque, as those
+// may be partialy annotated, we cannot simply call
+// is_double_ended_contiguous_container_asan_correct, as it assumes that
+// object memory inside is not annotated, so we check everything in a more careful way.
+
+template <typename D>
+bool verify_inside(D const& d) {
+ for (size_t i = 0; i < d.size(); ++i) {
+ if (!is_string_asan_correct(d[i]))
+ return false;
+ }
+
+ return true;
+}
+
+template <typename S, size_t N>
+S get_s(char c) {
+ S s;
+ for (size_t i = 0; i < N; ++i)
+ s.push_back(c);
+
+ return s;
+}
+
+template <class C, class S>
+void test_string() {
+ size_t const N = sizeof(S) < 256 ? (4096 / sizeof(S)) : 16;
+
+ {
+ C d1a(1), d1b(N), d1c(N + 1), d1d(32 * N);
+ assert(verify_inside(d1a));
+ assert(verify_inside(d1b));
+ assert(verify_inside(d1c));
+ assert(verify_inside(d1d));
+ }
+ {
+ C d2;
+ for (size_t i = 0; i < 16 * N; ++i) {
+ d2.push_back(get_s<S, 1>(i % 10 + 'a'));
+ assert(verify_inside(d2));
+ d2.push_back(get_s<S, 222>(i % 10 + 'b'));
+ assert(verify_inside(d2));
+
+ d2.pop_front();
+ assert(verify_inside(d2));
+ }
+ }
+ {
+ C d3;
+ for (size_t i = 0; i < 16 * N; ++i) {
+ d3.push_front(get_s<S, 1>(i % 10 + 'a'));
+ assert(verify_inside(d3));
+ d3.push_front(get_s<S, 222>(i % 10 + 'b'));
+ assert(verify_inside(d3));
+
+ d3.pop_back();
+ assert(verify_inside(d3));
+ }
+ }
+ {
+ C d4;
+ for (size_t i = 0; i < 16 * N; ++i) {
+ // When there is no SSO, all elements inside should not be poisoned,
+ // so we can verify deque poisoning.
+ d4.push_front(get_s<S, 333>(i % 10 + 'a'));
+ assert(verify_inside(d4));
+ assert(is_double_ended_contiguous_container_asan_correct(d4));
+ d4.push_back(get_s<S, 222>(i % 10 + 'b'));
+ assert(verify_inside(d4));
+ assert(is_double_ended_contiguous_container_asan_correct(d4));
+ }
+ }
+ {
+ C d5;
+ for (size_t i = 0; i < 5 * N; ++i) {
+ // In d4 we never had poisoned memory inside deque.
+ // Here we start with SSO, so part of the inside of the container,
+ // will be poisoned.
+ d5.push_front(S());
+ assert(verify_inside(d5));
+ }
+ for (size_t i = 0; i < d5.size(); ++i) {
+ // We change the size to have long string.
+ // Memory owne by deque should not be poisoned by string.
+ d5[i].resize(1000);
+ assert(verify_inside(d5));
+ }
+
+ assert(is_double_ended_contiguous_container_asan_correct(d5));
+
+ d5.erase(d5.begin() + 2);
+ assert(verify_inside(d5));
+
+ d5.erase(d5.end() - 2);
+ assert(verify_inside(d5));
+
+ assert(is_double_ended_contiguous_container_asan_correct(d5));
+ }
+ {
+ C d6a;
+ assert(is_double_ended_contiguous_container_asan_correct(d6a));
+
+ C d6b(N + 2, get_s<S, 1000>('a'));
+ d6b.push_front(get_s<S, 1001>('b'));
+ while (!d6b.empty()) {
+ d6b.pop_back();
+ assert(is_double_ended_contiguous_container_asan_correct(d6b));
+ }
+
+ C d6c(N + 2, get_s<S, 1002>('c'));
+ while (!d6c.empty()) {
+ d6c.pop_back();
+ assert(is_double_ended_contiguous_container_asan_correct(d6c));
+ }
+ }
+ {
+ C d7(9 * N + 2);
+
+ d7.insert(d7.begin() + 1, S());
+ assert(verify_inside(d7));
+
+ d7.insert(d7.end() - 3, S());
+ assert(verify_inside(d7));
+
+ d7.insert(d7.begin() + 2 * N, get_s<S, 1>('a'));
+ assert(verify_inside(d7));
+
+ d7.insert(d7.end() - 2 * N, get_s<S, 1>('b'));
+ assert(verify_inside(d7));
+
+ d7.insert(d7.begin() + 2 * N, 3 * N, get_s<S, 1>('c'));
+ assert(verify_inside(d7));
+
+ // It may not be short for big element types, but it will be checked correctly:
+ d7.insert(d7.end() - 2 * N, 3 * N, get_s<S, 2>('d'));
+ assert(verify_inside(d7));
+
+ d7.erase(d7.begin() + 2);
+ assert(verify_inside(d7));
+
+ d7.erase(d7.end() - 2);
+ assert(verify_inside(d7));
+ }
+}
+
+template<class S>
+void test_container() {
+ test_string<std::deque<S, std::allocator<S>>, S>();
+ test_string<std::deque<S, min_allocator<S>>, S>();
+ test_string<std::deque<S, safe_allocator<S>>, S>();
+}
+
+int main(int, char**) {
+ // Those tests support only types based on std::basic_string.
+ test_container<std::string>();
+ test_container<std::wstring>();
+#if TEST_STD_VER >= 11
+ test_container<std::u16string>();
+ test_container<std::u32string>();
+#endif
+#if TEST_STD_VER >= 20
+ test_container<std::u8string>();
+#endif
+}
diff --git a/libcxx/test/libcxx/containers/strings/basic.string/asan_short.pass.cpp b/libcxx/test/libcxx/containers/strings/basic.string/asan_short.pass.cpp
new file mode 100644
index 00000000000000..6c692b38c35d43
--- /dev/null
+++ b/libcxx/test/libcxx/containers/strings/basic.string/asan_short.pass.cpp
@@ -0,0 +1,53 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: asan
+// UNSUPPORTED: c++03
+
+// <string>
+
+// Basic test if ASan annotations work for short strings.
+
+#include <string>
+#include <cassert>
+#include <cstdlib>
+
+#include "asan_testing.h"
+#include "min_allocator.h"
+#include "test_iterators.h"
+#include "test_macros.h"
+
+extern "C" void __sanitizer_set_death_callback(void (*callback)(void));
+
+void do_exit() { exit(0); }
+
+int main(int, char**) {
+ {
+ typedef cpp17_input_iterator<char*> MyInputIter;
+ // Should not trigger ASan.
+ std::basic_string<char, std::char_traits<char>, safe_allocator<char>> v;
+ char i[] = {'a', 'b', 'c', 'd'};
+
+ v.insert(v.begin(), MyInputIter(i), MyInputIter(i + 4));
+ assert(v[0] == 'a');
+ assert(is_string_asan_correct(v));
+ }
+
+ __sanitizer_set_death_callback(do_exit);
+ {
+ using T = char;
+ using C = std::basic_string<T, std::char_traits<T>, safe_allocator<T>>;
+ const T t[] = {'a', 'b', 'c', 'd', 'e', 'f', 'g'};
+ C c(std::begin(t), std::end(t));
+ assert(is_string_asan_correct(c));
+ assert(__sanitizer_verify_contiguous_container(c.data(), c.data() + c.size() + 1, c.data() + c.capacity() + 1) != 0);
+ volatile T foo = c[c.size() + 1]; // should trigger ASAN. Use volatile to prevent being optimized away.
+ assert(false); // if we got here, ASAN didn't trigger
+ ((void)foo);
+ }
+}
diff --git a/libcxx/test/libcxx/containers/strings/basic.string/asan_vector_integration.pass.cpp b/libcxx/test/libcxx/containers/strings/basic.string/asan_vector_integration.pass.cpp
new file mode 100644
index 00000000000000..a3a33ea84ca103
--- /dev/null
+++ b/libcxx/test/libcxx/containers/strings/basic.string/asan_vector_integration.pass.cpp
@@ -0,0 +1,180 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: asan
+// UNSUPPORTED: c++03
+
+#include <cassert>
+#include <array>
+#include "test_macros.h"
+#include "asan_testing.h" // includes vector and string - don't do it before
+
+// This tests exists to check if strings work well with vector, as those
+// may be partialy annotated, we cannot simply call
+// is_contiguous_container_asan_correct, as it assumes that
+// object memory inside is not annotated, so we check everything in a more careful way.
+
+template <typename D>
+bool verify_inside(D const& d) {
+ for (size_t i = 0; i < d.size(); ++i) {
+ if (!is_string_asan_correct(d[i]))
+ return false;
+ }
+
+ return true;
+}
+
+template <typename S, size_t N>
+S get_s(char c) {
+ S s;
+ for (size_t i = 0; i < N; ++i)
+ s.push_back(c);
+
+ return s;
+}
+
+template <class C, class S>
+void test_string() {
+ size_t const N = sizeof(S) < 256 ? (4096 / sizeof(S)) : 16;
+
+ {
+ C d1a(1), d1b(N), d1c(N + 1), d1d(32 * N);
+ assert(verify_inside(d1a));
+ assert(verify_inside(d1b));
+ assert(verify_inside(d1c));
+ assert(verify_inside(d1d));
+ }
+ {
+ C d2;
+ for (size_t i = 0; i < 16 * N; ++i) {
+ d2.push_back(get_s<S, 1>(i % 10 + 'a'));
+ assert(verify_inside(d2));
+ d2.push_back(get_s<S, 222>(i % 10 + 'b'));
+ assert(verify_inside(d2));
+
+ d2.erase(d2.cbegin());
+ assert(verify_inside(d2));
+ }
+ }
+ {
+ C d3;
+ for (size_t i = 0; i < 16 * N; ++i) {
+ d3.push_back(get_s<S, 1>(i % 10 + 'a'));
+ assert(verify_inside(d3));
+ d3.push_back(get_s<S, 222>(i % 10 + 'b'));
+ assert(verify_inside(d3));
+
+ d3.pop_back();
+ assert(verify_inside(d3));
+ }
+ }
+ {
+ C d4;
+ for (size_t i = 0; i < 16 * N; ++i) {
+ // When there is no SSO, all elements inside should not be poisoned,
+ // so we can verify vector poisoning.
+ d4.push_back(get_s<S, 333>(i % 10 + 'a'));
+ assert(verify_inside(d4));
+ assert(is_contiguous_container_asan_correct(d4));
+ d4.push_back(get_s<S, 222>(i % 10 + 'b'));
+ assert(verify_inside(d4));
+ assert(is_contiguous_container_asan_correct(d4));
+ }
+ }
+ {
+ C d5;
+ for (size_t i = 0; i < 5 * N; ++i) {
+ // In d4 we never had poisoned memory inside vector.
+ // Here we start with SSO, so part of the inside of the container,
+ // will be poisoned.
+ d5.push_back(S());
+ assert(verify_inside(d5));
+ }
+ for (size_t i = 0; i < d5.size(); ++i) {
+ // We change the size to have long string.
+ // Memory owne by vector should not be poisoned by string.
+ d5[i].resize(1000);
+ assert(verify_inside(d5));
+ }
+
+ assert(is_contiguous_container_asan_correct(d5));
+
+ d5.erase(d5.begin() + 2);
+ assert(verify_inside(d5));
+
+ d5.erase(d5.end() - 2);
+ assert(verify_inside(d5));
+
+ assert(is_contiguous_container_asan_correct(d5));
+ }
+ {
+ C d6a;
+ assert(is_contiguous_container_asan_correct(d6a));
+
+ C d6b(N + 2, get_s<S, 1000>('a'));
+ d6b.push_back(get_s<S, 1001>('b'));
+ while (!d6b.empty()) {
+ d6b.pop_back();
+ assert(is_contiguous_container_asan_correct(d6b));
+ }
+
+ C d6c(N + 2, get_s<S, 1002>('c'));
+ while (!d6c.empty()) {
+ d6c.pop_back();
+ assert(is_contiguous_container_asan_correct(d6c));
+ }
+ }
+ {
+ C d7(9 * N + 2);
+
+ d7.insert(d7.begin() + 1, S());
+ assert(verify_inside(d7));
+
+ d7.insert(d7.end() - 3, S());
+ assert(verify_inside(d7));
+
+ d7.insert(d7.begin() + 2 * N, get_s<S, 1>('a'));
+ assert(verify_inside(d7));
+
+ d7.insert(d7.end() - 2 * N, get_s<S, 1>('b'));
+ assert(verify_inside(d7));
+
+ d7.insert(d7.begin() + 2 * N, 3 * N, get_s<S, 1>('c'));
+ assert(verify_inside(d7));
+
+ // It may not be short for big element types, but it will be checked correctly:
+ d7.insert(d7.end() - 2 * N, 3 * N, get_s<S, 2>('d'));
+ assert(verify_inside(d7));
+
+ d7.erase(d7.begin() + 2);
+ assert(verify_inside(d7));
+
+ d7.erase(d7.end() - 2);
+ assert(verify_inside(d7));
+ }
+}
+
+template<class S>
+void test_container() {
+ test_string<std::vector<S, std::allocator<S>>, S>();
+ test_string<std::vector<S, min_allocator<S>>, S>();
+ test_string<std::vector<S, safe_allocator<S>>, S>();
+}
+
+int main(int, char**) {
+ // Those tests support only types based on std::basic_string.
+ test_container<std::string>();
+ test_container<std::wstring>();
+#if TEST_STD_VER >= 11
+ test_container<std::u16string>();
+ test_container<std::u32string>();
+#endif
+#if TEST_STD_VER >= 20
+ test_container<std::u8string>();
+#endif
+}
diff --git a/libcxx/test/support/asan_testing.h b/libcxx/test/support/asan_testing.h
index 2dfec5c42b00b2..23545ff0d0c686 100644
--- a/libcxx/test/support/asan_testing.h
+++ b/libcxx/test/support/asan_testing.h
@@ -74,17 +74,10 @@ TEST_CONSTEXPR bool is_string_asan_correct(const std::basic_string<ChrT, TraitsT
if (TEST_IS_CONSTANT_EVALUATED)
return true;
- if (!is_string_short(c) || _LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED) {
- if (std::is_same<Alloc, std::allocator<ChrT>>::value)
- return __sanitizer_verify_contiguous_container(c.data(), c.data() + c.size() + 1, c.data() + c.capacity() + 1) !=
- 0;
- else
- return __sanitizer_verify_contiguous_container(
- c.data(), c.data() + c.capacity() + 1, c.data() + c.capacity() + 1) != 0;
- } else {
- return __sanitizer_verify_contiguous_container(std::addressof(c), std::addressof(c) + 1, std::addressof(c) + 1) !=
- 0;
- }
+ if (std::is_same<Alloc, std::allocator<ChrT>>::value)
+ return __sanitizer_verify_contiguous_container(c.data(), c.data() + c.size() + 1, c.data() + c.capacity() + 1) != 0;
+ else
+ return __sanitizer_verify_contiguous_container(c.data(), c.data() + c.capacity() + 1, c.data() + c.capacity() + 1) != 0;
}
#else
# include <string>
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
b67e891
to
bd8bc4e
Compare
Based on https://github.com/llvm/llvm-project/pull/75845/files#r1431877764, I'm going to look at this test case and understand what is happening there for sure: //===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// <string>
#include <string>
#include <cassert>
#include "asan_testing.h"
constexpr std::string s = "abcdef";
// launder 's' through a non-constexpr function
std::string const& hide() { return s; }
/* not constinit */
std::string s2 = hide();
int main() {
//LIBCPP_ASSERT(is_string_asan_correct(s));
LIBCPP_ASSERT(is_string_asan_correct(s2));
//LIBCPP_ASSERT(is_string_asan_correct(hide()));
assert(s == "abcdef");
assert(s2 == "abcdef");
assert(hide() == "abcdef");
} This shouldn't affect other PRs. |
This function replaces a call to `__move_assign` (internal function) with two calls to public member functions (`resize` and `erase`). The order of calls is chosen for the best performance. This change is required to [turn on ASan string annotations for short strings](#75882) (Short String Optimization - SSO). The `std::basic_string` class's `void __move_assign(basic_string&& __str, size_type __pos, size_type __len)` function operates on uninitialized strings, where it is reasonable to assume that the memory is not poisoned. However, in `sstream` this function is applied to existing strings that already have poisoned memory. String ASan annotations turned on here: #72677
bd8bc4e
to
8f71de6
Compare
8eb2478
to
9441b9d
Compare
libcxx/test/libcxx/containers/strings/basic.string/asan_vector_integration.pass.cpp
Show resolved
Hide resolved
libcxx/test/libcxx/containers/strings/basic.string/asan_vector_integration.pass.cpp
Outdated
Show resolved
Hide resolved
libcxx/test/libcxx/containers/strings/basic.string/asan_deque_integration.pass.cpp
Outdated
Show resolved
Hide resolved
libcxx/test/libcxx/containers/strings/basic.string/asan_deque_integration.pass.cpp
Outdated
Show resolved
Hide resolved
This commit turns on ASan annotations in `std::basic_string` for short stings (SSO case). Originally suggested here: https://reviews.llvm.org/D147680 String annotations added here: llvm#72677 Annotating `std::basic_string` with default allocator is implemented in llvm#72677 but annotations for short strings (SSO - Short String Optimization) are turned off there. This commit turns them on. This also removes `_LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED`, because we do not plan to support turning on and off short string annotations. Support in ASan API exists since llvm@dd1b7b7. You can turn off annotations for a specific allocator based on changes from llvm@2fa1bec. This PR is a part of a series of patches extending AddressSanitizer C++ container overflow detection capabilities by adding annotations, similar to those existing in `std::vector` and `std::deque` collections. These enhancements empower ASan to effectively detect instances where the instrumented program attempts to access memory within a collection's internal allocation that remains unused. This includes cases where access occurs before or after the stored elements in `std::deque`, or between the `std::basic_string`'s size (including the null terminator) and capacity bounds. The introduction of these annotations was spurred by a real-world software bug discovered by Trail of Bits, involving an out-of-bounds memory access during the comparison of two strings using the `std::equals` function. This function was taking iterators (`iter1_begin`, `iter1_end`, `iter2_begin`) to perform the comparison, using a custom comparison function. When the `iter1` object exceeded the length of `iter2`, an out-of-bounds read could occur on the `iter2` object. Container sanitization, upon enabling these annotations, would effectively identify and flag this potential vulnerability. If you have any questions, please email: [email protected] [email protected]
I'm merging this PR while buildkite tests are still running (Windows and FreeBSD), I expect them to pass because before rebase those two tests passed. I'm monitoring their status to act, if a bug is detected. ARM tests are failing, but those are unrelated and appear in different builds:
@ldionne thx for reviewing my PRs and help! |
Buildbots have related failures: https://lab.llvm.org/buildbot/#/builders/168/builds/18126 Edit: It looks like a detected bug and not a bug introduced by this PR, but I'm still looking at it to confirm. |
This commit lowers values in `std::vector` tests to as good as previous ones, but faster. One tests caused a problem with buildbots: https://lab.llvm.org/buildbot/#/builders/168/builds/18126/steps/11/logs/stdio Test added here: llvm#75882
This also looks confusing https://lab.llvm.org/buildbot/#/builders/239/builds/5361 |
That one is confusing, seems to be slightly different than previous ones.
This line was problematic earlier. But I'm not sure if it's related and it's not just a coincidence. if(!__s.__is_long()) __s.__annotate_delete(); Memory should be unpoisoned, if the string is short, but shadow memory looks like it's not. What allocator is used here? |
This commit turns on ASan annotations in `std::basic_string` for short stings (SSO case). Originally suggested here: https://reviews.llvm.org/D147680 String annotations added here: llvm#72677 Requires to pass CI without fails: - llvm#75845 - llvm#75858 Annotating `std::basic_string` with default allocator is implemented in llvm#72677 but annotations for short strings (SSO - Short String Optimization) are turned off there. This commit turns them on. This also removes `_LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED`, because we do not plan to support turning on and off short string annotations. Support in ASan API exists since llvm@dd1b7b7. You can turn off annotations for a specific allocator based on changes from llvm@2fa1bec. This PR is a part of a series of patches extending AddressSanitizer C++ container overflow detection capabilities by adding annotations, similar to those existing in `std::vector` and `std::deque` collections. These enhancements empower ASan to effectively detect instances where the instrumented program attempts to access memory within a collection's internal allocation that remains unused. This includes cases where access occurs before or after the stored elements in `std::deque`, or between the `std::basic_string`'s size (including the null terminator) and capacity bounds. The introduction of these annotations was spurred by a real-world software bug discovered by Trail of Bits, involving an out-of-bounds memory access during the comparison of two strings using the `std::equals` function. This function was taking iterators (`iter1_begin`, `iter1_end`, `iter2_begin`) to perform the comparison, using a custom comparison function. When the `iter1` object exceeded the length of `iter2`, an out-of-bounds read could occur on the `iter2` object. Container sanitization, upon enabling these annotations, would effectively identify and flag this potential vulnerability. If you have any questions, please email: [email protected] [email protected]
This commit lowers values in `std::vector` tests to as good as previous ones, but faster. One tests caused a problem with buildbots: https://lab.llvm.org/buildbot/#/builders/168/builds/18126/steps/11/logs/stdio Test added here: llvm#75882
Originally merged here: #75882 Reverted here: #78627 Reverted due to failing buildbots. The problem was not caused by the annotations code, but by code in the `UniqueFunctionBase` class and in the `JSON.h` file. That code caused the program to write to memory that was already being used by string objects, which resulted in an ASan error. Fixes are implemented in: - #79065 - #79066 Problematic code from `UniqueFunctionBase` for example: ```cpp #ifndef NDEBUG // In debug builds, we also scribble across the rest of the storage. memset(RHS.getInlineStorage(), 0xAD, InlineStorageSize); #endif ``` --- Original description: This commit turns on ASan annotations in `std::basic_string` for short stings (SSO case). Originally suggested here: https://reviews.llvm.org/D147680 String annotations added here: #72677 Requires to pass CI without fails: - #75845 - #75858 Annotating `std::basic_string` with default allocator is implemented in #72677 but annotations for short strings (SSO - Short String Optimization) are turned off there. This commit turns them on. This also removes `_LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED`, because we do not plan to support turning on and off short string annotations. Support in ASan API exists since dd1b7b7. You can turn off annotations for a specific allocator based on changes from 2fa1bec. This PR is a part of a series of patches extending AddressSanitizer C++ container overflow detection capabilities by adding annotations, similar to those existing in `std::vector` and `std::deque` collections. These enhancements empower ASan to effectively detect instances where the instrumented program attempts to access memory within a collection's internal allocation that remains unused. This includes cases where access occurs before or after the stored elements in `std::deque`, or between the `std::basic_string`'s size (including the null terminator) and capacity bounds. The introduction of these annotations was spurred by a real-world software bug discovered by Trail of Bits, involving an out-of-bounds memory access during the comparison of two strings using the `std::equals` function. This function was taking iterators (`iter1_begin`, `iter1_end`, `iter2_begin`) to perform the comparison, using a custom comparison function. When the `iter1` object exceeded the length of `iter2`, an out-of-bounds read could occur on the `iter2` object. Container sanitization, upon enabling these annotations, would effectively identify and flag this potential vulnerability. If you have any questions, please email: [email protected] [email protected]
Originally merged here: #75882 Reverted here: #78627 Reverted due to failing buildbots. The problem was not caused by the annotations code, but by code in the `UniqueFunctionBase` class and in the `JSON.h` file. That code caused the program to write to memory that was already being used by string objects, which resulted in an ASan error. Fixes are implemented in: - #79065 - #79066 Problematic code from `UniqueFunctionBase` for example: ```cpp #ifndef NDEBUG // In debug builds, we also scribble across the rest of the storage. memset(RHS.getInlineStorage(), 0xAD, InlineStorageSize); #endif ```
This is 3rd attempt to upstream short string annotations, it's the same as the previous one, but other PRs fixed issues withing LLVM: - llvm#79489 - llvm#79522 Additionaly annotations were updated (but it shouldn't have any impact on anything): - llvm#79292 Now, as far as I know, all buildbots should work without problems. Both previous reverts were not related to issues with string annotations, but with issues in LLVM/clang. Read PRs above and below for details. --- Previous description: Originally merged here: llvm#75882 Reverted here: llvm#78627 Reverted due to failing buildbots. The problem was not caused by the annotations code, but by code in the `UniqueFunctionBase` class and in the `JSON.h` file. That code caused the program to write to memory that was already being used by string objects, which resulted in an ASan error. Fixes are implemented in: - llvm#79065 - llvm#79066 Problematic code from `UniqueFunctionBase` for example: ```cpp // In debug builds, we also scribble across the rest of the storage. memset(RHS.getInlineStorage(), 0xAD, InlineStorageSize); ``` --- Original description: This commit turns on ASan annotations in `std::basic_string` for short stings (SSO case). Originally suggested here: https://reviews.llvm.org/D147680 String annotations added here: llvm#72677 Requires to pass CI without fails: - llvm#75845 - llvm#75858 Annotating `std::basic_string` with default allocator is implemented in llvm#72677 but annotations for short strings (SSO - Short String Optimization) are turned off there. This commit turns them on. This also removes `_LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED`, because we do not plan to support turning on and off short string annotations. Support in ASan API exists since llvm@dd1b7b7. You can turn off annotations for a specific allocator based on changes from llvm@2fa1bec. This PR is a part of a series of patches extending AddressSanitizer C++ container overflow detection capabilities by adding annotations, similar to those existing in `std::vector` and `std::deque` collections. These enhancements empower ASan to effectively detect instances where the instrumented program attempts to access memory within a collection's internal allocation that remains unused. This includes cases where access occurs before or after the stored elements in `std::deque`, or between the `std::basic_string`'s size (including the null terminator) and capacity bounds. The introduction of these annotations was spurred by a real-world software bug discovered by Trail of Bits, involving an out-of-bounds memory access during the comparison of two strings using the `std::equals` function. This function was taking iterators (`iter1_begin`, `iter1_end`, `iter2_begin`) to perform the comparison, using a custom comparison function. When the `iter1` object exceeded the length of `iter2`, an out-of-bounds read could occur on the `iter2` object. Container sanitization, upon enabling these annotations, would effectively identify and flag this potential vulnerability. If you have any questions, please email: [email protected] [email protected]
Originally merged here: #75882 Reverted here: #78627 Reverted due to failing buildbots. The problem was not caused by the annotations code, but by code in the `UniqueFunctionBase` class and in the `JSON.h` file. That code caused the program to write to memory that was already being used by string objects, which resulted in an ASan error. Fixes are implemented in: - #79065 - #79066 Problematic code from `UniqueFunctionBase` for example: ```cpp // In debug builds, we also scribble across the rest of the storage. memset(RHS.getInlineStorage(), 0xAD, InlineStorageSize); ```
) This function replaces a call to `__move_assign` (internal function) with two calls to public member functions (`resize` and `erase`). The order of calls is chosen for the best performance. This change is required to [turn on ASan string annotations for short strings](llvm#75882) (Short String Optimization - SSO). The `std::basic_string` class's `void __move_assign(basic_string&& __str, size_type __pos, size_type __len)` function operates on uninitialized strings, where it is reasonable to assume that the memory is not poisoned. However, in `sstream` this function is applied to existing strings that already have poisoned memory. String ASan annotations turned on here: llvm#72677
This function replaces a call to `__move_assign` (internal function) with two calls to public member functions (`resize` and `erase`). The order of calls is chosen for the best performance. This change is required to [turn on ASan string annotations for short strings](llvm/llvm-project#75882) (Short String Optimization - SSO). The `std::basic_string` class's `void __move_assign(basic_string&& __str, size_type __pos, size_type __len)` function operates on uninitialized strings, where it is reasonable to assume that the memory is not poisoned. However, in `sstream` this function is applied to existing strings that already have poisoned memory. String ASan annotations turned on here: llvm/llvm-project#72677 NOKEYCHECK=True GitOrigin-RevId: 5351ded68d579921a61b26a34e36046c22f668bd
…78627) Reverts llvm/llvm-project#75882 To recover build bots : https://lab.llvm.org/buildbot/#/builders/239/builds/5361 https://lab.llvm.org/buildbot/#/builders/168/builds/18126 NOKEYCHECK=True GitOrigin-RevId: 2a54098de1af2e0d0d7e63d79e8eae8e8d78db92
Originally merged here: llvm/llvm-project#75882 Reverted here: llvm/llvm-project#78627 Reverted due to failing buildbots. The problem was not caused by the annotations code, but by code in the `UniqueFunctionBase` class and in the `JSON.h` file. That code caused the program to write to memory that was already being used by string objects, which resulted in an ASan error. Fixes are implemented in: - llvm/llvm-project#79065 - llvm/llvm-project#79066 Problematic code from `UniqueFunctionBase` for example: ```cpp #ifndef NDEBUG // In debug builds, we also scribble across the rest of the storage. memset(RHS.getInlineStorage(), 0xAD, InlineStorageSize); #endif ``` --- Original description: This commit turns on ASan annotations in `std::basic_string` for short stings (SSO case). Originally suggested here: https://reviews.llvm.org/D147680 String annotations added here: llvm/llvm-project#72677 Requires to pass CI without fails: - llvm/llvm-project#75845 - llvm/llvm-project#75858 Annotating `std::basic_string` with default allocator is implemented in llvm/llvm-project#72677 but annotations for short strings (SSO - Short String Optimization) are turned off there. This commit turns them on. This also removes `_LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED`, because we do not plan to support turning on and off short string annotations. Support in ASan API exists since llvm/llvm-project@dd1b7b7. You can turn off annotations for a specific allocator based on changes from llvm/llvm-project@2fa1bec. This PR is a part of a series of patches extending AddressSanitizer C++ container overflow detection capabilities by adding annotations, similar to those existing in `std::vector` and `std::deque` collections. These enhancements empower ASan to effectively detect instances where the instrumented program attempts to access memory within a collection's internal allocation that remains unused. This includes cases where access occurs before or after the stored elements in `std::deque`, or between the `std::basic_string`'s size (including the null terminator) and capacity bounds. The introduction of these annotations was spurred by a real-world software bug discovered by Trail of Bits, involving an out-of-bounds memory access during the comparison of two strings using the `std::equals` function. This function was taking iterators (`iter1_begin`, `iter1_end`, `iter2_begin`) to perform the comparison, using a custom comparison function. When the `iter1` object exceeded the length of `iter2`, an out-of-bounds read could occur on the `iter2` object. Container sanitization, upon enabling these annotations, would effectively identify and flag this potential vulnerability. If you have any questions, please email: [email protected] [email protected] NOKEYCHECK=True GitOrigin-RevId: cb528ec5e6331ce207c7b835d7ab963bd5e13af7
This is 3rd attempt to upstream short string annotations, it's the same as the previous one, but other PRs fixed issues withing LLVM: - llvm#79489 - llvm#79522 Additionaly annotations were updated (but it shouldn't have any impact on anything): - llvm#79292 Now, as far as I know, all buildbots should work without problems. Both previous reverts were not related to issues with string annotations, but with issues in LLVM/clang. Read PRs above and below for details. --- Previous description: Originally merged here: llvm#75882 Reverted here: llvm#78627 Reverted due to failing buildbots. The problem was not caused by the annotations code, but by code in the `UniqueFunctionBase` class and in the `JSON.h` file. That code caused the program to write to memory that was already being used by string objects, which resulted in an ASan error. Fixes are implemented in: - llvm#79065 - llvm#79066 Problematic code from `UniqueFunctionBase` for example: ```cpp // In debug builds, we also scribble across the rest of the storage. memset(RHS.getInlineStorage(), 0xAD, InlineStorageSize); ``` --- Original description: This commit turns on ASan annotations in `std::basic_string` for short stings (SSO case). Originally suggested here: https://reviews.llvm.org/D147680 String annotations added here: llvm#72677 Requires to pass CI without fails: - llvm#75845 - llvm#75858 Annotating `std::basic_string` with default allocator is implemented in llvm#72677 but annotations for short strings (SSO - Short String Optimization) are turned off there. This commit turns them on. This also removes `_LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED`, because we do not plan to support turning on and off short string annotations. Support in ASan API exists since llvm@dd1b7b7. You can turn off annotations for a specific allocator based on changes from llvm@2fa1bec. This PR is a part of a series of patches extending AddressSanitizer C++ container overflow detection capabilities by adding annotations, similar to those existing in `std::vector` and `std::deque` collections. These enhancements empower ASan to effectively detect instances where the instrumented program attempts to access memory within a collection's internal allocation that remains unused. This includes cases where access occurs before or after the stored elements in `std::deque`, or between the `std::basic_string`'s size (including the null terminator) and capacity bounds. The introduction of these annotations was spurred by a real-world software bug discovered by Trail of Bits, involving an out-of-bounds memory access during the comparison of two strings using the `std::equals` function. This function was taking iterators (`iter1_begin`, `iter1_end`, `iter2_begin`) to perform the comparison, using a custom comparison function. When the `iter1` object exceeded the length of `iter2`, an out-of-bounds read could occur on the `iter2` object. Container sanitization, upon enabling these annotations, would effectively identify and flag this potential vulnerability. If you have any questions, please email: [email protected] [email protected]
This is 3rd attempt to upstream short string annotations, it's the same as the previous one, but other PRs fixed issues withing LLVM: - llvm#79489 - llvm#79522 Additionaly annotations were updated (but it shouldn't have any impact on anything): - llvm#79292 Now, as far as I know, all buildbots should work without problems. Both previous reverts were not related to issues with string annotations, but with issues in LLVM/clang. Read PRs above and below for details. --- Previous description: Originally merged here: llvm#75882 Reverted here: llvm#78627 Reverted due to failing buildbots. The problem was not caused by the annotations code, but by code in the `UniqueFunctionBase` class and in the `JSON.h` file. That code caused the program to write to memory that was already being used by string objects, which resulted in an ASan error. Fixes are implemented in: - llvm#79065 - llvm#79066 Problematic code from `UniqueFunctionBase` for example: ```cpp // In debug builds, we also scribble across the rest of the storage. memset(RHS.getInlineStorage(), 0xAD, InlineStorageSize); ``` --- Original description: This commit turns on ASan annotations in `std::basic_string` for short stings (SSO case). Originally suggested here: https://reviews.llvm.org/D147680 String annotations added here: llvm#72677 Requires to pass CI without fails: - llvm#75845 - llvm#75858 Annotating `std::basic_string` with default allocator is implemented in llvm#72677 but annotations for short strings (SSO - Short String Optimization) are turned off there. This commit turns them on. This also removes `_LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED`, because we do not plan to support turning on and off short string annotations. Support in ASan API exists since llvm@dd1b7b7. You can turn off annotations for a specific allocator based on changes from llvm@2fa1bec. This PR is a part of a series of patches extending AddressSanitizer C++ container overflow detection capabilities by adding annotations, similar to those existing in `std::vector` and `std::deque` collections. These enhancements empower ASan to effectively detect instances where the instrumented program attempts to access memory within a collection's internal allocation that remains unused. This includes cases where access occurs before or after the stored elements in `std::deque`, or between the `std::basic_string`'s size (including the null terminator) and capacity bounds. The introduction of these annotations was spurred by a real-world software bug discovered by Trail of Bits, involving an out-of-bounds memory access during the comparison of two strings using the `std::equals` function. This function was taking iterators (`iter1_begin`, `iter1_end`, `iter2_begin`) to perform the comparison, using a custom comparison function. When the `iter1` object exceeded the length of `iter2`, an out-of-bounds read could occur on the `iter2` object. Container sanitization, upon enabling these annotations, would effectively identify and flag this potential vulnerability. If you have any questions, please email: [email protected] [email protected]
This is 3rd attempt to upstream short string annotations, it's the same as the previous one, but other PRs fixed issues withing LLVM: - llvm#79489 - llvm#79522 Additionaly annotations were updated (but it shouldn't have any impact on anything): - llvm#79292 Now, as far as I know, all buildbots should work without problems. Both previous reverts were not related to issues with string annotations, but with issues in LLVM/clang. Read PRs above and below for details. --- Previous description: Originally merged here: llvm#75882 Reverted here: llvm#78627 Reverted due to failing buildbots. The problem was not caused by the annotations code, but by code in the `UniqueFunctionBase` class and in the `JSON.h` file. That code caused the program to write to memory that was already being used by string objects, which resulted in an ASan error. Fixes are implemented in: - llvm#79065 - llvm#79066 Problematic code from `UniqueFunctionBase` for example: ```cpp // In debug builds, we also scribble across the rest of the storage. memset(RHS.getInlineStorage(), 0xAD, InlineStorageSize); ``` --- Original description: This commit turns on ASan annotations in `std::basic_string` for short stings (SSO case). Originally suggested here: https://reviews.llvm.org/D147680 String annotations added here: llvm#72677 Requires to pass CI without fails: - llvm#75845 - llvm#75858 Annotating `std::basic_string` with default allocator is implemented in llvm#72677 but annotations for short strings (SSO - Short String Optimization) are turned off there. This commit turns them on. This also removes `_LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED`, because we do not plan to support turning on and off short string annotations. Support in ASan API exists since llvm@dd1b7b7. You can turn off annotations for a specific allocator based on changes from llvm@2fa1bec. This PR is a part of a series of patches extending AddressSanitizer C++ container overflow detection capabilities by adding annotations, similar to those existing in `std::vector` and `std::deque` collections. These enhancements empower ASan to effectively detect instances where the instrumented program attempts to access memory within a collection's internal allocation that remains unused. This includes cases where access occurs before or after the stored elements in `std::deque`, or between the `std::basic_string`'s size (including the null terminator) and capacity bounds. The introduction of these annotations was spurred by a real-world software bug discovered by Trail of Bits, involving an out-of-bounds memory access during the comparison of two strings using the `std::equals` function. This function was taking iterators (`iter1_begin`, `iter1_end`, `iter2_begin`) to perform the comparison, using a custom comparison function. When the `iter1` object exceeded the length of `iter2`, an out-of-bounds read could occur on the `iter2` object. Container sanitization, upon enabling these annotations, would effectively identify and flag this potential vulnerability. If you have any questions, please email: [email protected] [email protected]
This pull request is the third iteration aiming to integrate short string annotations. This commit includes: - Enabling basic_string annotations for short strings. - Setting a value of `__trivially_relocatable` in `std::basic_string` to `false_type` when compiling with ASan (nothing changes when compiling without ASan). Short string annotations make `std::basic_string` to not be trivially relocatable, because memory has to be unpoisoned. - Adding a `_LIBCPP_STRING_INTERNAL_MEMORY_ACCESS` modifier to two functions. - Creating a macro `_LIBCPP_ASAN_VOLATILE_WRAPPER` to prevent problematic stack optimizations (the macro modifies code behavior only when compiling with ASan). Previously we had issues with compiler optimization, which we understand thanks to @vitalybuka. This commit also addresses smaller changes in short string, since previous upstream attempts. Problematic optimization was loading two values in code similar to: ``` __is_long() ? __get_long_size() : __get_short_size(); ``` We aim to resolve it with the volatile wrapper. This commit is built on top of two previous attempts which descriptions are below. Additionally, in the meantime, annotations were updated (but it shouldn't have any impact on anything): - #79292 --- Previous PR: #79049 Reverted: a16f81f Previous description: Originally merged here: #75882 Reverted here: #78627 Reverted due to failing buildbots. The problem was not caused by the annotations code, but by code in the `UniqueFunctionBase` class and in the `JSON.h` file. That code caused the program to write to memory that was already being used by string objects, which resulted in an ASan error. Fixes are implemented in: - #79065 - #79066 Problematic code from `UniqueFunctionBase` for example: ```cpp // In debug builds, we also scribble across the rest of the storage. memset(RHS.getInlineStorage(), 0xAD, InlineStorageSize); ``` --- Original description: This commit turns on ASan annotations in `std::basic_string` for short stings (SSO case). Originally suggested here: https://reviews.llvm.org/D147680 String annotations added here: #72677 Requires to pass CI without fails: - #75845 - #75858 Annotating `std::basic_string` with default allocator is implemented in #72677 but annotations for short strings (SSO - Short String Optimization) are turned off there. This commit turns them on. This also removes `_LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED`, because we do not plan to support turning on and off short string annotations. Support in ASan API exists since dd1b7b7. You can turn off annotations for a specific allocator based on changes from 2fa1bec. This PR is a part of a series of patches extending AddressSanitizer C++ container overflow detection capabilities by adding annotations, similar to those existing in `std::vector` and `std::deque` collections. These enhancements empower ASan to effectively detect instances where the instrumented program attempts to access memory within a collection's internal allocation that remains unused. This includes cases where access occurs before or after the stored elements in `std::deque`, or between the `std::basic_string`'s size (including the null terminator) and capacity bounds. The introduction of these annotations was spurred by a real-world software bug discovered by Trail of Bits, involving an out-of-bounds memory access during the comparison of two strings using the `std::equals` function. This function was taking iterators (`iter1_begin`, `iter1_end`, `iter2_begin`) to perform the comparison, using a custom comparison function. When the `iter1` object exceeded the length of `iter2`, an out-of-bounds read could occur on the `iter2` object. Container sanitization, upon enabling these annotations, would effectively identify and flag this potential vulnerability. If you have any questions, please email: - [email protected] - [email protected]
This commit turns on ASan annotations in
std::basic_string
for short stings (SSO case).Originally suggested here: https://reviews.llvm.org/D147680
String annotations added here: #72677
Requires to pass CI without fails:
std::basic_string
with all allocators #75845Annotating
std::basic_string
with default allocator is implemented in #72677 but annotations for short strings (SSO - Short String Optimization) are turned off there. This commit turns them on. This also removes_LIBCPP_SHORT_STRING_ANNOTATIONS_ALLOWED
, because we do not plan to support turning on and off short string annotations.Support in ASan API exists since dd1b7b7. You can turn off annotations for a specific allocator based on changes from 2fa1bec.
This PR is a part of a series of patches extending AddressSanitizer C++ container overflow detection capabilities by adding annotations, similar to those existing in
std::vector
andstd::deque
collections. These enhancements empower ASan to effectively detect instances where the instrumented program attempts to access memory within a collection's internal allocation that remains unused. This includes cases where access occurs before or after the stored elements instd::deque
, or between thestd::basic_string
's size (including the null terminator) and capacity bounds.The introduction of these annotations was spurred by a real-world software bug discovered by Trail of Bits, involving an out-of-bounds memory access during the comparison of two strings using the
std::equals
function. This function was taking iterators (iter1_begin
,iter1_end
,iter2_begin
) to perform the comparison, using a custom comparison function. When theiter1
object exceeded the length ofiter2
, an out-of-bounds read could occur on theiter2
object. Container sanitization, upon enabling these annotations, would effectively identify and flag this potential vulnerability.If you have any questions, please email: