Skip to content

Commit

Permalink
Revert "Fix TRT EP allocator memory leak (#16552)"
Browse files Browse the repository at this point in the history
This reverts commit d8792f8.
  • Loading branch information
yf711 committed Aug 7, 2023
1 parent 6c604c1 commit dd6b443
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 17 deletions.
16 changes: 3 additions & 13 deletions onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
#include "core/providers/cuda/shared_inc/cuda_call.h"
#include "core/providers/cuda/math/unary_elementwise_ops_impl.h"
#include "core/providers/cuda/gpu_data_transfer.h"
#include "core/session/allocator_adapters.h"
#include "cuda_runtime_api.h"
#include "core/common/gsl.h"
#include <unordered_map>
Expand Down Expand Up @@ -992,12 +991,6 @@ TensorrtExecutionProvider::~TensorrtExecutionProvider() {
ORT_IGNORE_RETURN_VALUE(CUDA_CALL(cudaStreamDestroy(stream_)));
}
ReleaseTensorRTCustomOpDomainList(info_.custom_op_domain_list);

if (alloc_ != nullptr) {
// This code is same as OrtApis::ReleaseAllocator defined in allocator_adapters.cc.
// We can't get api inside destructor so that's why we duplicate the code here.
delete static_cast<OrtAllocatorImpl*>(alloc_);
}
}

bool TensorrtExecutionProvider::IsGraphCaptureEnabled() const {
Expand Down Expand Up @@ -2266,18 +2259,15 @@ common::Status TensorrtExecutionProvider::Compile(const std::vector<FusedNodeAnd
auto trt_context = trt_state->context->get();
auto trt_profiles = trt_state->profiles;
auto max_context_mem_size_ptr = trt_state->max_context_mem_size_ptr;
OrtMemoryInfo mem_info("", OrtAllocatorType::OrtDeviceAllocator, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, 0));
OrtAllocator* alloc;
Ort::ThrowOnError(api->KernelContext_GetAllocator(context, &mem_info, &alloc));
int num_inputs = static_cast<int>(input_indexes.size());
int num_outputs = static_cast<int>(output_indexes.size());
bool engine_update = false;
std::unordered_set<std::string> input_names;
std::unordered_map<std::string, std::vector<int32_t>> tensor_shape_values;

OrtMemoryInfo mem_info("", OrtAllocatorType::OrtDeviceAllocator, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, 0));
if (alloc_ == nullptr) {
Ort::ThrowOnError(api->KernelContext_GetAllocator(context, &mem_info, &alloc_));
}
OrtAllocator* alloc = alloc_;

void* cuda_stream;
Ort::ThrowOnError(api->KernelContext_GetGPUComputeStream(context, &cuda_stream));
cudaStream_t stream = static_cast<cudaStream_t>(cuda_stream);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,10 +223,6 @@ class TensorrtExecutionProvider : public IExecutionProvider {
bool detailed_build_log_ = false;
bool cuda_graph_enable_ = false;

// The OrtAllocator object will be get during ep compute time
// and should be kept for the lifetime of TRT EP object.
OrtAllocator* alloc_ = nullptr;

std::unique_ptr<CUDAGraph> cuda_graph_; // ORT TRT only supports CUDA graph when whole model is supported by TRT, so simply maintaining a CUDAGraph pointer is enough (no need to maintain one CUDAGraph pointer per TRT subgraph)
bool is_graph_captured_ = false;
int regular_run_count_before_graph_capture_ = 0;
Expand Down

0 comments on commit dd6b443

Please sign in to comment.