From 4dc8a717313df5bdebd61d2791a01bacfbc436bd Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Wed, 27 Dec 2023 12:28:42 +1000 Subject: [PATCH 1/4] If we fail to calculate the buffer size (due to overflow) we currently return a nullptr. This is inconsistent as an actual memory allocation failure throws. An overflow would typically be due to bad input so an exception makes more sense given that. Change to throw so code using MakeUniquePtr* and AllocArray* doesn't need to check for nullptr. Add some extra info to the log message to help debugging. Should help with #18905 by avoiding the invalid attempted usage of a nullptr from the allocation. Extra info _might_ help with figuring out where the overflow is coming from which is the real issue. --- .../onnxruntime/core/framework/allocator.h | 46 ++++++++++++------- onnxruntime/core/framework/allocator.cc | 2 +- 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/include/onnxruntime/core/framework/allocator.h b/include/onnxruntime/core/framework/allocator.h index cbc2208b6bbd7..eff4e1d293a0f 100644 --- a/include/onnxruntime/core/framework/allocator.h +++ b/include/onnxruntime/core/framework/allocator.h @@ -8,6 +8,7 @@ #include "core/session/onnxruntime_c_api.h" #include "ortdevice.h" #include "ortmemoryinfo.h" +#include #include // This configures the arena based allocator used by ORT @@ -100,7 +101,8 @@ class IAllocator { * \param out Total size required after any alignment is applied * \return true, successful. false, overflow */ - [[nodiscard]] static bool CalcMemSizeForArrayWithAlignment(size_t nmemb, size_t size, size_t alignment, size_t* out) noexcept; + [[nodiscard]] static bool CalcMemSizeForArrayWithAlignment(size_t nmemb, size_t size, size_t alignment, + size_t* out) noexcept; /** * https://cwe.mitre.org/data/definitions/190.html @@ -120,8 +122,10 @@ class IAllocator { */ void* AllocArray(size_t nmemb, size_t size) { size_t len; - if (!CalcMemSizeForArray(nmemb, size, &len)) - return nullptr; + if (!CalcMemSizeForArray(nmemb, size, &len)) { + ORT_THROW("Invalid size requested for allocation: ", nmemb, " * ", size); + } + return Alloc(len); } @@ -131,8 +135,10 @@ class IAllocator { template void* AllocArrayWithAlignment(size_t nmemb, size_t size) { size_t len; - if (!CalcMemSizeForArrayWithAlignment(nmemb, size, alignment, &len)) - return nullptr; + if (!CalcMemSizeForArrayWithAlignment(nmemb, size, alignment, &len)) { + ORT_THROW("Invalid size requested for allocation: ", nmemb, " * ", size, " with alignment ", alignment); + } + return Alloc(len); } @@ -150,7 +156,8 @@ class IAllocator { static IAllocatorUniquePtr MakeUniquePtr(std::shared_ptr allocator, size_t count_or_bytes, bool use_reserve = false, Stream* stream = nullptr, WaitNotificationFn wait_fn = nullptr) { - if (allocator == nullptr) return nullptr; + assert(allocator != nullptr); + // for now limit to fundamental types. we could support others, but to do so either we or the caller // needs to call the dtor for the objects, for buffers allocated on device we don't have destructor // static_assert(std::is_fundamental::value, "Fundamental type required as no destructors are called."); @@ -161,35 +168,40 @@ class IAllocator { if constexpr (!std::is_void::value) { // sizeof(void) isn't valid, but the compiler isn't smart enough to ignore that this line isn't // reachable if T is void. use std::conditional to 'use' void* in the sizeof call - if (!CalcMemSizeForArray( - count_or_bytes, sizeof(typename std::conditional::value, void*, T>::type), &alloc_size)) { - return nullptr; + const auto size = sizeof(typename std::conditional::value, void*, T>::type); + if (!CalcMemSizeForArray(count_or_bytes, size, &alloc_size)) { + ORT_THROW("Invalid size requested for allocation: ", count_or_bytes, " * ", size); } } // allocate T* p = static_cast(AllocateBufferWithOptions(*allocator, alloc_size, use_reserve, stream, std::move(wait_fn))); - return IAllocatorUniquePtr{ - p, - [allocator = std::move(allocator)](T* p) { allocator->Free(p); }}; + return IAllocatorUniquePtr{p, + [allocator = std::move(allocator)](T* p) { + allocator->Free(p); + }}; } template static IAllocatorUniquePtr MakeUniquePtrFromOrtAllocator(OrtAllocator* ort_allocator, size_t count_or_bytes) { - if (!ort_allocator) return nullptr; + assert(ort_allocator != nullptr); size_t alloc_size = count_or_bytes; // if T is not void, 'count_or_bytes' == number of items so allow for that if constexpr (!std::is_void::value) { // sizeof(void) isn't valid, but the compiler isn't smart enough to ignore that this line isn't // reachable if T is void. use std::conditional to 'use' void* in the sizeof call - if (!CalcMemSizeForArray( - count_or_bytes, sizeof(typename std::conditional::value, void*, T>::type), &alloc_size)) { - return nullptr; + const auto size = sizeof(typename std::conditional::value, void*, T>::type); + if (!CalcMemSizeForArray(count_or_bytes, size, &alloc_size)) { + ORT_THROW("Invalid size requested for allocation: ", count_or_bytes, " * ", size); } } + T* p = static_cast(ort_allocator->Alloc(ort_allocator, count_or_bytes)); - return IAllocatorUniquePtr{p, [ort_allocator](T* p) { ort_allocator->Free(ort_allocator, p); }}; + return IAllocatorUniquePtr{p, + [ort_allocator](T* p) { + ort_allocator->Free(ort_allocator, p); + }}; } private: diff --git a/onnxruntime/core/framework/allocator.cc b/onnxruntime/core/framework/allocator.cc index 2499ead9effbd..c3e96e450c59b 100644 --- a/onnxruntime/core/framework/allocator.cc +++ b/onnxruntime/core/framework/allocator.cc @@ -33,7 +33,7 @@ bool IAllocator::CalcMemSizeForArrayWithAlignment(size_t nmemb, size_t size, siz ORT_CATCH(const OnnxRuntimeException& ex) { // overflow in calculating the size thrown by SafeInt. ORT_HANDLE_EXCEPTION([&]() { - LOGS_DEFAULT(ERROR) << ex.what(); + LOGS_DEFAULT(ERROR) << ex.what() << " nmemb=" << nmemb << " size=" << size << " alignment=" << alignment; ok = false; }); } From 5d0bce4f32546607ee17671de7bf700f16ce398a Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Thu, 28 Dec 2023 09:05:08 +1000 Subject: [PATCH 2/4] Use enforce for allocator and split out to avoid binary size impact. Cleanup the 2 places that were checking for a nullptr return and update the documentation to say the methods throw if they cannot allocate memory. --- .../onnxruntime/core/framework/allocator.h | 35 ++++++++++++++----- .../cpu/quantization/matmul_nbits.cc | 3 -- onnxruntime/core/framework/sparse_tensor.cc | 3 +- 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/include/onnxruntime/core/framework/allocator.h b/include/onnxruntime/core/framework/allocator.h index eff4e1d293a0f..1aa7bcb7dd501 100644 --- a/include/onnxruntime/core/framework/allocator.h +++ b/include/onnxruntime/core/framework/allocator.h @@ -150,13 +150,13 @@ class IAllocator { @param stream Which stream instance allocated chunk will be used with. @param wait_fn If the allocator want to dynamic reuse a chunk from another stream, use this wait_fn to sync on the target stream to make the reuse safe. - @returns std::unique_ptr with allocated memory and deleter. + @returns std::unique_ptr with allocated memory and deleter. Throws if it cannot allocate memory. */ template static IAllocatorUniquePtr MakeUniquePtr(std::shared_ptr allocator, size_t count_or_bytes, bool use_reserve = false, Stream* stream = nullptr, WaitNotificationFn wait_fn = nullptr) { - assert(allocator != nullptr); + ValidateAllocator(allocator); // for now limit to fundamental types. we could support others, but to do so either we or the caller // needs to call the dtor for the objects, for buffers allocated on device we don't have destructor @@ -169,9 +169,7 @@ class IAllocator { // sizeof(void) isn't valid, but the compiler isn't smart enough to ignore that this line isn't // reachable if T is void. use std::conditional to 'use' void* in the sizeof call const auto size = sizeof(typename std::conditional::value, void*, T>::type); - if (!CalcMemSizeForArray(count_or_bytes, size, &alloc_size)) { - ORT_THROW("Invalid size requested for allocation: ", count_or_bytes, " * ", size); - } + alloc_size = CheckedCalcMemSizeForArray(count_or_bytes, size); } // allocate @@ -182,9 +180,15 @@ class IAllocator { }}; } + /** + Create a std::unique_ptr that is allocated and freed by the provided OrtAllocator. + @param ort_allocator The allocator. + @param count_or_bytes The exact bytes to allocate if T is void, otherwise the number of elements to allocate. + @returns std::unique_ptr with allocated memory and deleter. Throws if it cannot allocate memory. + */ template static IAllocatorUniquePtr MakeUniquePtrFromOrtAllocator(OrtAllocator* ort_allocator, size_t count_or_bytes) { - assert(ort_allocator != nullptr); + ValidateAllocator(ort_allocator); size_t alloc_size = count_or_bytes; // if T is not void, 'count_or_bytes' == number of items so allow for that @@ -192,9 +196,7 @@ class IAllocator { // sizeof(void) isn't valid, but the compiler isn't smart enough to ignore that this line isn't // reachable if T is void. use std::conditional to 'use' void* in the sizeof call const auto size = sizeof(typename std::conditional::value, void*, T>::type); - if (!CalcMemSizeForArray(count_or_bytes, size, &alloc_size)) { - ORT_THROW("Invalid size requested for allocation: ", count_or_bytes, " * ", size); - } + alloc_size = CheckedCalcMemSizeForArray(count_or_bytes, size); } T* p = static_cast(ort_allocator->Alloc(ort_allocator, count_or_bytes)); @@ -205,6 +207,21 @@ class IAllocator { } private: + // validation functions. split out from methods that are templatized on the data type to minimize binary size. + template + static void ValidateAllocator(const T& allocator) { + ORT_ENFORCE(allocator != nullptr); + } + + static size_t CheckedCalcMemSizeForArray(size_t count, size_t size) { + size_t alloc_size = 0; + if (!CalcMemSizeForArray(count, size, &alloc_size)) { + ORT_THROW("Invalid size requested for allocation: ", count, " * ", size); + } + + return alloc_size; + } + OrtMemoryInfo memory_info_; }; diff --git a/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc b/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc index b060d500c6484..a9703dc68dd26 100644 --- a/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc +++ b/onnxruntime/contrib_ops/cpu/quantization/matmul_nbits.cc @@ -71,9 +71,6 @@ Status MatMulNBits::PrePack(const Tensor& tensor, int input_idx, /*out*/ Allocat if (packed_b_size_ == 0) return Status::OK(); auto qptr = tensor.Data(); packed_b_ = IAllocator::MakeUniquePtr(alloc, packed_b_size_, true); - if (packed_b_ == nullptr) { - return Status::OK(); - } std::memset(packed_b_.get(), 0, packed_b_size_); MlasNBitsGemmPackB(packed_b_.get(), qptr, nullptr, nullptr, N_, K_, K_, block_size_, static_cast(nbits_), is_asym_, false, compt_type, pool); diff --git a/onnxruntime/core/framework/sparse_tensor.cc b/onnxruntime/core/framework/sparse_tensor.cc index 5af2f4e4b543f..a3bcea4762d3e 100644 --- a/onnxruntime/core/framework/sparse_tensor.cc +++ b/onnxruntime/core/framework/sparse_tensor.cc @@ -220,7 +220,6 @@ Status SparseTensor::AllocateBuffer(int64_t buffer_size, size_t num_values) { ORT_RETURN_IF_NOT(buffer_size_t > values_bytes, "Values size ", static_cast(values_bytes), " must be less than total buffer size: ", buffer_size); auto data_ptr = IAllocator::MakeUniquePtr(allocator_, buffer_size_t); - ORT_RETURN_IF(data_ptr == nullptr, "SparseTensor Allocation failed for size: ", buffer_size); if (IsDataTypeString()) { // We own the buffer, so we must properly construct strings. Neither of the Tensors // we construct on top of the buffer own it. We are constructing empty strings, hopefully @@ -592,4 +591,4 @@ Status SparseTensor::Copy(const IDataTransfer& data_transfer, SparseTensor& dst_ } // namespace onnxruntime -#endif // !defined(DISABLE_SPARSE_TENSORS) \ No newline at end of file +#endif // !defined(DISABLE_SPARSE_TENSORS) From 41d7161430766df74300ac5531186929b1c682ef Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Thu, 28 Dec 2023 17:09:40 +1000 Subject: [PATCH 3/4] Fix bug in original code. Luckily the only current usages of MakeUniquePtrFromOrtAllocator are allocating `void` and the incorrect value didn't impact the allocation size's correctness. --- include/onnxruntime/core/framework/allocator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/onnxruntime/core/framework/allocator.h b/include/onnxruntime/core/framework/allocator.h index 1aa7bcb7dd501..67ae6ece6327f 100644 --- a/include/onnxruntime/core/framework/allocator.h +++ b/include/onnxruntime/core/framework/allocator.h @@ -199,7 +199,7 @@ class IAllocator { alloc_size = CheckedCalcMemSizeForArray(count_or_bytes, size); } - T* p = static_cast(ort_allocator->Alloc(ort_allocator, count_or_bytes)); + T* p = static_cast(ort_allocator->Alloc(ort_allocator, alloc_size)); return IAllocatorUniquePtr{p, [ort_allocator](T* p) { ort_allocator->Free(ort_allocator, p); From b50ae7847d7726cfe77ddf47fae71e78f30e0261 Mon Sep 17 00:00:00 2001 From: Scott McKay Date: Tue, 2 Jan 2024 12:22:47 +1000 Subject: [PATCH 4/4] Address PR comments --- .../onnxruntime/core/framework/allocator.h | 40 ++++++++++++++----- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/include/onnxruntime/core/framework/allocator.h b/include/onnxruntime/core/framework/allocator.h index 67ae6ece6327f..9015b23296e08 100644 --- a/include/onnxruntime/core/framework/allocator.h +++ b/include/onnxruntime/core/framework/allocator.h @@ -3,13 +3,14 @@ #pragma once +#include + #include "core/common/common.h" #include "core/framework/allocator_stats.h" +// some enums are defined in session/onnxruntime_c_api.h but used in ortdevice.h/ortmemory.h #include "core/session/onnxruntime_c_api.h" -#include "ortdevice.h" -#include "ortmemoryinfo.h" -#include -#include +#include "core/framework/ortdevice.h" +#include "core/framework/ortmemoryinfo.h" // This configures the arena based allocator used by ORT // See docs/C_API.md for details on what these mean and how to choose these values @@ -69,8 +70,12 @@ class IAllocator { IAllocator(const OrtMemoryInfo& info) : memory_info_(info) {} virtual ~IAllocator() = default; /** - @remarks Use SafeInt when calculating the size of memory to allocate using Alloc. - */ + * Allocate memory of the specified size. + * If size is 0, nullptr is returned. + * If allocation fails, an exception is thrown. + * + * @remarks Use SafeInt when calculating the size of memory to allocate using Alloc. + */ virtual void* Alloc(size_t size) = 0; virtual void Free(void* p) = 0; @@ -168,12 +173,14 @@ class IAllocator { if constexpr (!std::is_void::value) { // sizeof(void) isn't valid, but the compiler isn't smart enough to ignore that this line isn't // reachable if T is void. use std::conditional to 'use' void* in the sizeof call - const auto size = sizeof(typename std::conditional::value, void*, T>::type); - alloc_size = CheckedCalcMemSizeForArray(count_or_bytes, size); + constexpr auto size = sizeof(typename std::conditional::value, void*, T>::type); + alloc_size = ValidatedCalcMemSizeForArray(count_or_bytes, size); } // allocate T* p = static_cast(AllocateBufferWithOptions(*allocator, alloc_size, use_reserve, stream, std::move(wait_fn))); + ValidateAllocation(p, alloc_size); + return IAllocatorUniquePtr{p, [allocator = std::move(allocator)](T* p) { allocator->Free(p); @@ -195,11 +202,13 @@ class IAllocator { if constexpr (!std::is_void::value) { // sizeof(void) isn't valid, but the compiler isn't smart enough to ignore that this line isn't // reachable if T is void. use std::conditional to 'use' void* in the sizeof call - const auto size = sizeof(typename std::conditional::value, void*, T>::type); - alloc_size = CheckedCalcMemSizeForArray(count_or_bytes, size); + constexpr auto size = sizeof(typename std::conditional::value, void*, T>::type); + alloc_size = ValidatedCalcMemSizeForArray(count_or_bytes, size); } T* p = static_cast(ort_allocator->Alloc(ort_allocator, alloc_size)); + ValidateAllocation(p, alloc_size); + return IAllocatorUniquePtr{p, [ort_allocator](T* p) { ort_allocator->Free(ort_allocator, p); @@ -207,13 +216,16 @@ class IAllocator { } private: + // // validation functions. split out from methods that are templatized on the data type to minimize binary size. + // + template static void ValidateAllocator(const T& allocator) { ORT_ENFORCE(allocator != nullptr); } - static size_t CheckedCalcMemSizeForArray(size_t count, size_t size) { + static size_t ValidatedCalcMemSizeForArray(size_t count, size_t size) { size_t alloc_size = 0; if (!CalcMemSizeForArray(count, size, &alloc_size)) { ORT_THROW("Invalid size requested for allocation: ", count, " * ", size); @@ -222,6 +234,12 @@ class IAllocator { return alloc_size; } + static void ValidateAllocation(void* p, size_t size) { + // allocator should throw directly but in case it didn't ensure we do here so that calling code doesn't + // need to check for nullptr when an actual allocation was expected. + ORT_ENFORCE(p != nullptr || size == 0, "Memory allocation failed. Size=", size); + }; + OrtMemoryInfo memory_info_; };