From ed344c73483c4bb30a15d4fae0c76423b77e0bd8 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Fri, 21 Jan 2022 05:22:28 +0000
Subject: [PATCH 01/25] expand model tests name

---
 onnxruntime/test/providers/cpu/model_tests.cc | 41 ++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)
diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc
index fbdb421e5fa46..1de031bfaded8 100644
--- a/onnxruntime/test/providers/cpu/model_tests.cc
+++ b/onnxruntime/test/providers/cpu/model_tests.cc
@@ -3,6 +3,9 @@
 
 #include <iterator>
 #include <gtest/gtest.h>
+#include <string>
+#include <codecvt>
+#include <locale>
 
 #include "core/session/onnxruntime_c_api.h"
 #include "core/session/onnxruntime_cxx_api.h"
@@ -990,7 +993,43 @@ ::std::vector<::std::basic_string<ORTCHAR_T>> GetParameterStrings() {
   return v;
 }
 
-INSTANTIATE_TEST_SUITE_P(ModelTests, ModelTest, testing::ValuesIn(GetParameterStrings()));
+auto ExpandModelName  = [](const ::testing::TestParamInfo<ModelTest::ParamType>& info) {
+  // use info.param here to generate the test suffix
+  std::basic_string<ORTCHAR_T> name = info.param;
+
+  // the original name here is the combination of provider name and model path name
+  // remove the trailing 'xxxxxxx/model.onnx' of name
+  if (name.size() > 11 && name.substr(name.size() - 11) == ORT_TSTR("/model.onnx")) {
+    name = name.substr(0, info.param.size() - 11);
+  }
+  // remove the trailing 'xxxxxx.onnx' of name
+  else if (name.size() > 5 && name.substr(name.size() - 5) == ORT_TSTR(".onnx")) {
+    name = name.substr(0, info.param.size() - 5);
+  }
+
+  // Note: test name only accepts '_' and alphanumeric
+  // replace '/' or '\' with '_'
+  std::replace(name.begin(), name.end(), '/', '_');
+  std::replace(name.begin(), name.end(), '\\', '_');
+
+  // Note: test name only accepts '_' and alphanumeric
+  // remove '.' and '-'
+  char chars[] = ".-";
+  for (unsigned int i = 0; i < strlen(chars); ++i) {
+    name.erase(std::remove(name.begin(), name.end(), chars[i]), name.end());
+  }
+#ifdef _WIN32
+  // Note: The return value of INSTANTIATE_TEST_SUITE_P accpets std::basic_string<char...>.
+  // Need conversion of wchar_t to char.
+  return std::wstring_convert<std::codecvt_utf8<wchar_t>>().to_bytes(name);
+#else
+  return name;
+#endif
+};
+
+// The optional last argument is a function or functor that generates custom test name suffixes based on the test parameters.
+// Specify the last argument to make test name more meaningful and clear instead of just the sequential number.
+INSTANTIATE_TEST_SUITE_P(ModelTests, ModelTest, testing::ValuesIn(GetParameterStrings()), ExpandModelName);
 
 }  // namespace test
 }  // namespace onnxruntime

From f3d977227dae5830184115386b3b69cff12d5bec Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Fri, 21 Jan 2022 05:22:56 +0000
Subject: [PATCH 02/25] skip cpu/cuda for trt when running onnxruntime_test_all

---
 cmake/onnxruntime_unittests.cmake | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
index 341d0a7ad6620..c8750a1e882c4 100644
--- a/cmake/onnxruntime_unittests.cmake
+++ b/cmake/onnxruntime_unittests.cmake
@@ -15,7 +15,7 @@ endif()
 
 set(disabled_warnings)
 function(AddTest)
-  cmake_parse_arguments(_UT "DYN" "TARGET" "LIBS;SOURCES;DEPENDS" ${ARGN})
+  cmake_parse_arguments(_UT "DYN" "TARGET" "LIBS;SOURCES;DEPENDS;TEST_ARGS" ${ARGN})
   list(REMOVE_DUPLICATES _UT_SOURCES)
 
   if (${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
@@ -93,7 +93,7 @@ function(AddTest)
     target_compile_options(${_UT_TARGET} PRIVATE "-Wno-error=uninitialized")
   endif()
 
-  set(TEST_ARGS)
+  set(TEST_ARGS ${_UT_TEST_ARGS})
   if (onnxruntime_GENERATE_TEST_REPORTS)
     # generate a report file next to the test program
     if (onnxruntime_BUILD_WEBASSEMBLY)
@@ -682,6 +682,11 @@ if (onnxruntime_BUILD_WEBASSEMBLY)
   endif()
 endif()
 
+set(test_all_args)
+if (onnxruntime_USE_TENSORRT)
+    list(APPEND test_all_args "--gtest_filter=-*cpu__*:*cuda__*" )
+endif ()
+
 AddTest(
   TARGET onnxruntime_test_all
   SOURCES ${all_tests} ${onnxruntime_unittest_main_src}
@@ -689,6 +694,7 @@ AddTest(
     onnx_test_runner_common ${onnxruntime_test_providers_libs} ${onnxruntime_test_common_libs}
     onnx_test_data_proto nlohmann_json::nlohmann_json
   DEPENDS ${all_dependencies}
+  TEST_ARGS ${test_all_args} 
 )
 if (MSVC)
   # The warning means the type of two integral values around a binary operator is narrow than their result.

From 3e5267bb9527328d7a4ef92f41540357c1bd556f Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Fri, 21 Jan 2022 21:53:45 +0000
Subject: [PATCH 03/25] only run trt ep for c++ unit test

---
 onnxruntime/test/providers/provider_test_utils.cc | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/onnxruntime/test/providers/provider_test_utils.cc b/onnxruntime/test/providers/provider_test_utils.cc
index f74768aee7b51..36f9f606eecb3 100644
--- a/onnxruntime/test/providers/provider_test_utils.cc
+++ b/onnxruntime/test/providers/provider_test_utils.cc
@@ -994,6 +994,11 @@ void OpTester::Run(
     std::vector<std::string> output_names;
     FillFeedsAndOutputNames(feeds, output_names);
     // Run the model
+#if defined(USE_TENSORRT)
+    static const std::string all_provider_types[] = {
+        kTensorrtExecutionProvider
+    };
+#else
     static const std::string all_provider_types[] = {
         kCpuExecutionProvider,
         kCudaExecutionProvider,
@@ -1008,6 +1013,7 @@ void OpTester::Run(
         kRocmExecutionProvider,
         kCoreMLExecutionProvider,
     };
+#endif
 
     bool has_run = false;
 

From 0034c553e68011ef4a3f72b6638b2cd42ab3466e Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Fri, 21 Jan 2022 23:10:32 +0000
Subject: [PATCH 04/25] Update CMAKE_CUDA_ARCHITECTURES for T4

---
 .../github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
index 3b89fa63cacdb..4bf6fd07db326 100644
--- a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
@@ -39,7 +39,7 @@ jobs:
               --enable_onnx_tests --use_cuda --cuda_version=11.4 --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \
               --enable_pybind --build_java --build_nodejs \
               --use_tensorrt --tensorrt_home /usr \
-              --cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-10/root/usr/bin/cc  CMAKE_CUDA_ARCHITECTURES=52
+              --cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-10/root/usr/bin/cc  CMAKE_CUDA_ARCHITECTURES=52;70;75
         workingDirectory: $(Build.SourcesDirectory)
     - task: PublishTestResults@2
       displayName: 'Publish unit test results'

From ed5e28cd3f4f726d4ab390263480d9208cd9abec Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Sat, 22 Jan 2022 01:20:56 +0000
Subject: [PATCH 05/25] Use new t4 agent pool

---
 onnxruntime/test/providers/provider_test_utils.cc               | 1 +
 .../github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/onnxruntime/test/providers/provider_test_utils.cc b/onnxruntime/test/providers/provider_test_utils.cc
index 36f9f606eecb3..b6adf3d12d87e 100644
--- a/onnxruntime/test/providers/provider_test_utils.cc
+++ b/onnxruntime/test/providers/provider_test_utils.cc
@@ -996,6 +996,7 @@ void OpTester::Run(
     // Run the model
 #if defined(USE_TENSORRT)
     static const std::string all_provider_types[] = {
+        kCpuExecutionProvider,
         kTensorrtExecutionProvider
     };
 #else
diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
index 4bf6fd07db326..a58d9d33fc076 100644
--- a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
@@ -1,6 +1,6 @@
 jobs:
 - job: Linux_CI_GPU_TENSORRT_Dev
-  pool: onnxruntime-tensorrt-linuxbuild
+  pool: onnxruntime-tensorrt-linuxbuild-t4
   variables:
     ALLOW_RELEASED_ONNX_OPSET_ONLY: '1'
   timeoutInMinutes: 180

From dd70a4bc5b799b1177a9bc0471348ca10dcd016f Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Sat, 22 Jan 2022 01:26:17 +0000
Subject: [PATCH 06/25] Update YAML for run T4 on Windows

---
 .../github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml
index ad6a5d2a4d555..24693a1e77bab 100644
--- a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml
@@ -1,6 +1,6 @@
 jobs:
 - job: 'build'
-  pool: 'onnxruntime-tensorrt8-winbuild'
+  pool: 'onnxruntime-tensorrt8-winbuild-t4'
   variables:
     OrtPackageId: 'Microsoft.ML.OnnxRuntime'
     MsbuildArguments: '-detailedsummary -maxcpucount -consoleloggerparameters:PerformanceSummary'
@@ -47,7 +47,7 @@ jobs:
     displayName: 'Generate cmake config'
     inputs:
       scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py'
-      arguments: '--config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 16 2019" --build_wheel --enable_onnx_tests --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.2.1.8.Windows10.x86_64.cuda-11.4.cudnn8.2" --cuda_version=11.4 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.4" --cudnn_home="C:\local\cudnn-11.4-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=52'
+      arguments: '--config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 16 2019" --build_wheel --enable_onnx_tests --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.2.1.8.Windows10.x86_64.cuda-11.4.cudnn8.2" --cuda_version=11.4 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.4" --cudnn_home="C:\local\cudnn-11.4-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=52;70;75'
       workingDirectory: '$(Build.BinariesDirectory)'
 
   - task: VSBuild@1

From 8ac7ef611feb4e406712ae8895661a237a46a932 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Sat, 22 Jan 2022 17:56:54 +0000
Subject: [PATCH 07/25] revert code

---
 onnxruntime/test/providers/provider_test_utils.cc | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/onnxruntime/test/providers/provider_test_utils.cc b/onnxruntime/test/providers/provider_test_utils.cc
index b6adf3d12d87e..f74768aee7b51 100644
--- a/onnxruntime/test/providers/provider_test_utils.cc
+++ b/onnxruntime/test/providers/provider_test_utils.cc
@@ -994,12 +994,6 @@ void OpTester::Run(
     std::vector<std::string> output_names;
     FillFeedsAndOutputNames(feeds, output_names);
     // Run the model
-#if defined(USE_TENSORRT)
-    static const std::string all_provider_types[] = {
-        kCpuExecutionProvider,
-        kTensorrtExecutionProvider
-    };
-#else
     static const std::string all_provider_types[] = {
         kCpuExecutionProvider,
         kCudaExecutionProvider,
@@ -1014,7 +1008,6 @@ void OpTester::Run(
         kRocmExecutionProvider,
         kCoreMLExecutionProvider,
     };
-#endif
 
     bool has_run = false;
 

From 0c897b282541da508ba33d914f1e559e36021d2d Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Sat, 22 Jan 2022 20:12:28 +0000
Subject: [PATCH 08/25] Update CMAKE_CUDA_ARCHITECTURES

---
 .../github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
index a58d9d33fc076..4f3b62dbc0243 100644
--- a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
@@ -39,7 +39,7 @@ jobs:
               --enable_onnx_tests --use_cuda --cuda_version=11.4 --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \
               --enable_pybind --build_java --build_nodejs \
               --use_tensorrt --tensorrt_home /usr \
-              --cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-10/root/usr/bin/cc  CMAKE_CUDA_ARCHITECTURES=52;70;75
+              --cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-10/root/usr/bin/cc  CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80
         workingDirectory: $(Build.SourcesDirectory)
     - task: PublishTestResults@2
       displayName: 'Publish unit test results'

From 0dfab8ed94a0b14baf54692a533051ab0133f6b1 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Sun, 23 Jan 2022 01:43:27 +0000
Subject: [PATCH 09/25] fix wrong value

---
 .../github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml   | 2 +-
 .../github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
index 4f3b62dbc0243..2c1212a48de70 100644
--- a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
@@ -39,7 +39,7 @@ jobs:
               --enable_onnx_tests --use_cuda --cuda_version=11.4 --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \
               --enable_pybind --build_java --build_nodejs \
               --use_tensorrt --tensorrt_home /usr \
-              --cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-10/root/usr/bin/cc  CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80
+              --cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-10/root/usr/bin/cc  "CMAKE_CUDA_ARCHITECTURES=52;70;75"
         workingDirectory: $(Build.SourcesDirectory)
     - task: PublishTestResults@2
       displayName: 'Publish unit test results'
diff --git a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml
index 24693a1e77bab..597cbcd81fe2b 100644
--- a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml
@@ -47,7 +47,7 @@ jobs:
     displayName: 'Generate cmake config'
     inputs:
       scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py'
-      arguments: '--config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 16 2019" --build_wheel --enable_onnx_tests --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.2.1.8.Windows10.x86_64.cuda-11.4.cudnn8.2" --cuda_version=11.4 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.4" --cudnn_home="C:\local\cudnn-11.4-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=52;70;75'
+      arguments: '--config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 16 2019" --build_wheel --enable_onnx_tests --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.2.1.8.Windows10.x86_64.cuda-11.4.cudnn8.2" --cuda_version=11.4 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.4" --cudnn_home="C:\local\cudnn-11.4-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES="52;70;75"'
       workingDirectory: '$(Build.BinariesDirectory)'
 
   - task: VSBuild@1

From 333568a8d216fc5d460ab2d7a93489a28d73aa72 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 24 Jan 2022 22:35:59 +0000
Subject: [PATCH 10/25] Remove cpu/cuda directly in model tests

---
 cmake/onnxruntime_unittests.cmake             |  6 +-
 onnxruntime/test/providers/cpu/model_tests.cc | 66 ++++++++++---------
 2 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
index c8750a1e882c4..50d86b30f175b 100644
--- a/cmake/onnxruntime_unittests.cmake
+++ b/cmake/onnxruntime_unittests.cmake
@@ -683,9 +683,9 @@ if (onnxruntime_BUILD_WEBASSEMBLY)
 endif()
 
 set(test_all_args)
-if (onnxruntime_USE_TENSORRT)
-    list(APPEND test_all_args "--gtest_filter=-*cpu__*:*cuda__*" )
-endif ()
+#if (onnxruntime_USE_TENSORRT)
+    #list(APPEND test_all_args "--gtest_filter=-*cpu__*:*cuda__*" )
+#endif ()
 
 AddTest(
   TARGET onnxruntime_test_all
diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc
index 1de031bfaded8..135c0fc05f78f 100644
--- a/onnxruntime/test/providers/cpu/model_tests.cc
+++ b/onnxruntime/test/providers/cpu/model_tests.cc
@@ -713,40 +713,42 @@ TEST_P(ModelTest, Run) {
 // TODO: all providers
 ::std::vector<::std::basic_string<ORTCHAR_T>> GetParameterStrings() {
   std::vector<const ORTCHAR_T*> provider_names;
-  provider_names.push_back(ORT_TSTR("cpu"));
 #ifdef USE_TENSORRT
+  // If TRT EP is built, we want to test TRT EP only to save CI time.
   provider_names.push_back(ORT_TSTR("tensorrt"));
-#endif
-#ifdef USE_MIGRAPHX
-  provider_names.push_back(ORT_TSTR("migraphx"));
-#endif
-#ifdef USE_OPENVINO
-  provider_names.push_back(ORT_TSTR("openvino"));
-#endif
-#ifdef USE_CUDA
-  provider_names.push_back(ORT_TSTR("cuda"));
-#endif
-#ifdef USE_ROCM
-  provider_names.push_back(ORT_TSTR("rocm"));
-#endif
-#ifdef USE_DNNL
-  provider_names.push_back(ORT_TSTR("dnnl"));
-#endif
-#ifdef USE_NUPHAR
-  provider_names.push_back(ORT_TSTR("nuphar"));
-#endif
-// For any non-Android system, NNAPI will only be used for ort model converter
-#if defined(USE_NNAPI) && defined(__ANDROID__)
-  provider_names.push_back(ORT_TSTR("nnapi"));
-#endif
-#ifdef USE_RKNPU
-  provider_names.push_back(ORT_TSTR("rknpu"));
-#endif
-#ifdef USE_ACL
-  provider_names.push_back(ORT_TSTR("acl"));
-#endif
-#ifdef USE_ARMNN
-  provider_names.push_back(ORT_TSTR("armnn"));
+#else
+  provider_names.push_back(ORT_TSTR("cpu"));
+  #ifdef USE_MIGRAPHX
+    provider_names.push_back(ORT_TSTR("migraphx"));
+  #endif
+  #ifdef USE_OPENVINO
+    provider_names.push_back(ORT_TSTR("openvino"));
+  #endif
+  #ifdef USE_CUDA
+    provider_names.push_back(ORT_TSTR("cuda"));
+  #endif
+  #ifdef USE_ROCM
+    provider_names.push_back(ORT_TSTR("rocm"));
+  #endif
+  #ifdef USE_DNNL
+    provider_names.push_back(ORT_TSTR("dnnl"));
+  #endif
+  #ifdef USE_NUPHAR
+    provider_names.push_back(ORT_TSTR("nuphar"));
+  #endif
+  // For any non-Android system, NNAPI will only be used for ort model converter
+  #if defined(USE_NNAPI) && defined(__ANDROID__)
+    provider_names.push_back(ORT_TSTR("nnapi"));
+  #endif
+  #ifdef USE_RKNPU
+    provider_names.push_back(ORT_TSTR("rknpu"));
+  #endif
+  #ifdef USE_ACL
+    provider_names.push_back(ORT_TSTR("acl"));
+  #endif
+  #ifdef USE_ARMNN
+    provider_names.push_back(ORT_TSTR("armnn"));
+  #endif
 #endif
   std::vector<std::basic_string<ORTCHAR_T>> v;
   // Permanently exclude following tests because ORT support only opset starting from 7,

From e9f48a221a13bc7b58d1072ae6ac8c70aaf30f48 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 24 Jan 2022 22:42:59 +0000
Subject: [PATCH 11/25] add only CMAKE_CUDA_ARCHITECTURES=75

---
 .../github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
index 2c1212a48de70..3c12697973231 100644
--- a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
@@ -39,7 +39,7 @@ jobs:
               --enable_onnx_tests --use_cuda --cuda_version=11.4 --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \
               --enable_pybind --build_java --build_nodejs \
               --use_tensorrt --tensorrt_home /usr \
-              --cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-10/root/usr/bin/cc  "CMAKE_CUDA_ARCHITECTURES=52;70;75"
+              --cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-10/root/usr/bin/cc  "CMAKE_CUDA_ARCHITECTURES=75"
         workingDirectory: $(Build.SourcesDirectory)
     - task: PublishTestResults@2
       displayName: 'Publish unit test results'

From efe6d31e4e3aa1641f5d2bd451bd9bd423c90710 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Tue, 25 Jan 2022 00:56:04 +0000
Subject: [PATCH 12/25] remove expanding model test name to see difference

---
 onnxruntime/test/providers/cpu/model_tests.cc | 63 ++++++++++---------
 1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc
index 135c0fc05f78f..8fe57393cc3b7 100644
--- a/onnxruntime/test/providers/cpu/model_tests.cc
+++ b/onnxruntime/test/providers/cpu/model_tests.cc
@@ -995,43 +995,44 @@ ::std::vector<::std::basic_string<ORTCHAR_T>> GetParameterStrings() {
   return v;
 }
 
-auto ExpandModelName  = [](const ::testing::TestParamInfo<ModelTest::ParamType>& info) {
-  // use info.param here to generate the test suffix
-  std::basic_string<ORTCHAR_T> name = info.param;
+//auto ExpandModelName  = [](const ::testing::TestParamInfo<ModelTest::ParamType>& info) {
+  //// use info.param here to generate the test suffix
+  //std::basic_string<ORTCHAR_T> name = info.param;
 
-  // the original name here is the combination of provider name and model path name
-  // remove the trailing 'xxxxxxx/model.onnx' of name
-  if (name.size() > 11 && name.substr(name.size() - 11) == ORT_TSTR("/model.onnx")) {
-    name = name.substr(0, info.param.size() - 11);
-  }
-  // remove the trailing 'xxxxxx.onnx' of name
-  else if (name.size() > 5 && name.substr(name.size() - 5) == ORT_TSTR(".onnx")) {
-    name = name.substr(0, info.param.size() - 5);
-  }
+  //// the original name here is the combination of provider name and model path name
+  //// remove the trailing 'xxxxxxx/model.onnx' of name
+  //if (name.size() > 11 && name.substr(name.size() - 11) == ORT_TSTR("/model.onnx")) {
+    //name = name.substr(0, info.param.size() - 11);
+  //}
+  //// remove the trailing 'xxxxxx.onnx' of name
+  //else if (name.size() > 5 && name.substr(name.size() - 5) == ORT_TSTR(".onnx")) {
+    //name = name.substr(0, info.param.size() - 5);
+  //}
 
-  // Note: test name only accepts '_' and alphanumeric
-  // replace '/' or '\' with '_'
-  std::replace(name.begin(), name.end(), '/', '_');
-  std::replace(name.begin(), name.end(), '\\', '_');
+  //// Note: test name only accepts '_' and alphanumeric
+  //// replace '/' or '\' with '_'
+  //std::replace(name.begin(), name.end(), '/', '_');
+  //std::replace(name.begin(), name.end(), '\\', '_');
 
-  // Note: test name only accepts '_' and alphanumeric
-  // remove '.' and '-'
-  char chars[] = ".-";
-  for (unsigned int i = 0; i < strlen(chars); ++i) {
-    name.erase(std::remove(name.begin(), name.end(), chars[i]), name.end());
-  }
-#ifdef _WIN32
-  // Note: The return value of INSTANTIATE_TEST_SUITE_P accpets std::basic_string<char...>.
-  // Need conversion of wchar_t to char.
-  return std::wstring_convert<std::codecvt_utf8<wchar_t>>().to_bytes(name);
-#else
-  return name;
-#endif
-};
+  //// Note: test name only accepts '_' and alphanumeric
+  //// remove '.' and '-'
+  //char chars[] = ".-";
+  //for (unsigned int i = 0; i < strlen(chars); ++i) {
+    //name.erase(std::remove(name.begin(), name.end(), chars[i]), name.end());
+  //}
+//#ifdef _WIN32
+  //// Note: The return value of INSTANTIATE_TEST_SUITE_P accpets std::basic_string<char...>.
+  //// Need conversion of wchar_t to char.
+  //return std::wstring_convert<std::codecvt_utf8<wchar_t>>().to_bytes(name);
+//#else
+  //return name;
+//#endif
+//};
 
 // The optional last argument is a function or functor that generates custom test name suffixes based on the test parameters.
 // Specify the last argument to make test name more meaningful and clear instead of just the sequential number.
-INSTANTIATE_TEST_SUITE_P(ModelTests, ModelTest, testing::ValuesIn(GetParameterStrings()), ExpandModelName);
+//INSTANTIATE_TEST_SUITE_P(ModelTests, ModelTest, testing::ValuesIn(GetParameterStrings()), ExpandModelName);
+INSTANTIATE_TEST_SUITE_P(ModelTests, ModelTest, testing::ValuesIn(GetParameterStrings()));
 
 }  // namespace test
 }  // namespace onnxruntime

From e069b3c18cf7fefcb237ffbbd8f5cd8e15ae97e0 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Tue, 25 Jan 2022 23:00:32 +0000
Subject: [PATCH 13/25] revert code

---
 cmake/onnxruntime_unittests.cmake             |   6 +-
 onnxruntime/test/providers/cpu/model_tests.cc | 129 +++++++++---------
 2 files changed, 66 insertions(+), 69 deletions(-)

diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
index 50d86b30f175b..c8750a1e882c4 100644
--- a/cmake/onnxruntime_unittests.cmake
+++ b/cmake/onnxruntime_unittests.cmake
@@ -683,9 +683,9 @@ if (onnxruntime_BUILD_WEBASSEMBLY)
 endif()
 
 set(test_all_args)
-#if (onnxruntime_USE_TENSORRT)
-    #list(APPEND test_all_args "--gtest_filter=-*cpu__*:*cuda__*" )
-#endif ()
+if (onnxruntime_USE_TENSORRT)
+    list(APPEND test_all_args "--gtest_filter=-*cpu__*:*cuda__*" )
+endif ()
 
 AddTest(
   TARGET onnxruntime_test_all
diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc
index 8fe57393cc3b7..1de031bfaded8 100644
--- a/onnxruntime/test/providers/cpu/model_tests.cc
+++ b/onnxruntime/test/providers/cpu/model_tests.cc
@@ -713,42 +713,40 @@ TEST_P(ModelTest, Run) {
 // TODO: all providers
 ::std::vector<::std::basic_string<ORTCHAR_T>> GetParameterStrings() {
   std::vector<const ORTCHAR_T*> provider_names;
+  provider_names.push_back(ORT_TSTR("cpu"));
 #ifdef USE_TENSORRT
-  // If TRT EP is built, we want to test TRT EP only to save CI time.
   provider_names.push_back(ORT_TSTR("tensorrt"));
-#else
-  provider_names.push_back(ORT_TSTR("cpu"));
-  #ifdef USE_MIGRAPHX
-    provider_names.push_back(ORT_TSTR("migraphx"));
-  #endif
-  #ifdef USE_OPENVINO
-    provider_names.push_back(ORT_TSTR("openvino"));
-  #endif
-  #ifdef USE_CUDA
-    provider_names.push_back(ORT_TSTR("cuda"));
-  #endif
-  #ifdef USE_ROCM
-    provider_names.push_back(ORT_TSTR("rocm"));
-  #endif
-  #ifdef USE_DNNL
-    provider_names.push_back(ORT_TSTR("dnnl"));
-  #endif
-  #ifdef USE_NUPHAR
-    provider_names.push_back(ORT_TSTR("nuphar"));
-  #endif
-  // For any non-Android system, NNAPI will only be used for ort model converter
-  #if defined(USE_NNAPI) && defined(__ANDROID__)
-    provider_names.push_back(ORT_TSTR("nnapi"));
-  #endif
-  #ifdef USE_RKNPU
-    provider_names.push_back(ORT_TSTR("rknpu"));
-  #endif
-  #ifdef USE_ACL
-    provider_names.push_back(ORT_TSTR("acl"));
-  #endif
-  #ifdef USE_ARMNN
-    provider_names.push_back(ORT_TSTR("armnn"));
-  #endif
+#endif
+#ifdef USE_MIGRAPHX
+  provider_names.push_back(ORT_TSTR("migraphx"));
+#endif
+#ifdef USE_OPENVINO
+  provider_names.push_back(ORT_TSTR("openvino"));
+#endif
+#ifdef USE_CUDA
+  provider_names.push_back(ORT_TSTR("cuda"));
+#endif
+#ifdef USE_ROCM
+  provider_names.push_back(ORT_TSTR("rocm"));
+#endif
+#ifdef USE_DNNL
+  provider_names.push_back(ORT_TSTR("dnnl"));
+#endif
+#ifdef USE_NUPHAR
+  provider_names.push_back(ORT_TSTR("nuphar"));
+#endif
+// For any non-Android system, NNAPI will only be used for ort model converter
+#if defined(USE_NNAPI) && defined(__ANDROID__)
+  provider_names.push_back(ORT_TSTR("nnapi"));
+#endif
+#ifdef USE_RKNPU
+  provider_names.push_back(ORT_TSTR("rknpu"));
+#endif
+#ifdef USE_ACL
+  provider_names.push_back(ORT_TSTR("acl"));
+#endif
+#ifdef USE_ARMNN
+  provider_names.push_back(ORT_TSTR("armnn"));
 #endif
   std::vector<std::basic_string<ORTCHAR_T>> v;
   // Permanently exclude following tests because ORT support only opset starting from 7,
@@ -995,44 +993,43 @@ ::std::vector<::std::basic_string<ORTCHAR_T>> GetParameterStrings() {
   return v;
 }
 
-//auto ExpandModelName  = [](const ::testing::TestParamInfo<ModelTest::ParamType>& info) {
-  //// use info.param here to generate the test suffix
-  //std::basic_string<ORTCHAR_T> name = info.param;
+auto ExpandModelName  = [](const ::testing::TestParamInfo<ModelTest::ParamType>& info) {
+  // use info.param here to generate the test suffix
+  std::basic_string<ORTCHAR_T> name = info.param;
 
-  //// the original name here is the combination of provider name and model path name
-  //// remove the trailing 'xxxxxxx/model.onnx' of name
-  //if (name.size() > 11 && name.substr(name.size() - 11) == ORT_TSTR("/model.onnx")) {
-    //name = name.substr(0, info.param.size() - 11);
-  //}
-  //// remove the trailing 'xxxxxx.onnx' of name
-  //else if (name.size() > 5 && name.substr(name.size() - 5) == ORT_TSTR(".onnx")) {
-    //name = name.substr(0, info.param.size() - 5);
-  //}
+  // the original name here is the combination of provider name and model path name
+  // remove the trailing 'xxxxxxx/model.onnx' of name
+  if (name.size() > 11 && name.substr(name.size() - 11) == ORT_TSTR("/model.onnx")) {
+    name = name.substr(0, info.param.size() - 11);
+  }
+  // remove the trailing 'xxxxxx.onnx' of name
+  else if (name.size() > 5 && name.substr(name.size() - 5) == ORT_TSTR(".onnx")) {
+    name = name.substr(0, info.param.size() - 5);
+  }
 
-  //// Note: test name only accepts '_' and alphanumeric
-  //// replace '/' or '\' with '_'
-  //std::replace(name.begin(), name.end(), '/', '_');
-  //std::replace(name.begin(), name.end(), '\\', '_');
+  // Note: test name only accepts '_' and alphanumeric
+  // replace '/' or '\' with '_'
+  std::replace(name.begin(), name.end(), '/', '_');
+  std::replace(name.begin(), name.end(), '\\', '_');
 
-  //// Note: test name only accepts '_' and alphanumeric
-  //// remove '.' and '-'
-  //char chars[] = ".-";
-  //for (unsigned int i = 0; i < strlen(chars); ++i) {
-    //name.erase(std::remove(name.begin(), name.end(), chars[i]), name.end());
-  //}
-//#ifdef _WIN32
-  //// Note: The return value of INSTANTIATE_TEST_SUITE_P accpets std::basic_string<char...>.
-  //// Need conversion of wchar_t to char.
-  //return std::wstring_convert<std::codecvt_utf8<wchar_t>>().to_bytes(name);
-//#else
-  //return name;
-//#endif
-//};
+  // Note: test name only accepts '_' and alphanumeric
+  // remove '.' and '-'
+  char chars[] = ".-";
+  for (unsigned int i = 0; i < strlen(chars); ++i) {
+    name.erase(std::remove(name.begin(), name.end(), chars[i]), name.end());
+  }
+#ifdef _WIN32
+  // Note: The return value of INSTANTIATE_TEST_SUITE_P accpets std::basic_string<char...>.
+  // Need conversion of wchar_t to char.
+  return std::wstring_convert<std::codecvt_utf8<wchar_t>>().to_bytes(name);
+#else
+  return name;
+#endif
+};
 
 // The optional last argument is a function or functor that generates custom test name suffixes based on the test parameters.
 // Specify the last argument to make test name more meaningful and clear instead of just the sequential number.
-//INSTANTIATE_TEST_SUITE_P(ModelTests, ModelTest, testing::ValuesIn(GetParameterStrings()), ExpandModelName);
-INSTANTIATE_TEST_SUITE_P(ModelTests, ModelTest, testing::ValuesIn(GetParameterStrings()));
+INSTANTIATE_TEST_SUITE_P(ModelTests, ModelTest, testing::ValuesIn(GetParameterStrings()), ExpandModelName);
 
 }  // namespace test
 }  // namespace onnxruntime

From 29c1f5226fe3ab8601714c2c26c6d35b4430a8af Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Tue, 25 Jan 2022 23:00:52 +0000
Subject: [PATCH 14/25] Add fallback execution provider for unit test

---
 onnxruntime/test/providers/provider_test_utils.cc | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/onnxruntime/test/providers/provider_test_utils.cc b/onnxruntime/test/providers/provider_test_utils.cc
index f74768aee7b51..a9b7c328710dd 100644
--- a/onnxruntime/test/providers/provider_test_utils.cc
+++ b/onnxruntime/test/providers/provider_test_utils.cc
@@ -994,6 +994,11 @@ void OpTester::Run(
     std::vector<std::string> output_names;
     FillFeedsAndOutputNames(feeds, output_names);
     // Run the model
+#ifdef USE_TENSORRT
+    static const std::string all_provider_types[] = {
+        kTensorrtExecutionProvider,
+    };
+#else
     static const std::string all_provider_types[] = {
         kCpuExecutionProvider,
         kCudaExecutionProvider,
@@ -1008,6 +1013,7 @@ void OpTester::Run(
         kRocmExecutionProvider,
         kCoreMLExecutionProvider,
     };
+#endif
 
     bool has_run = false;
 
@@ -1056,8 +1062,10 @@ void OpTester::Run(
 
     } else {
       for (const std::string& provider_type : all_provider_types) {
+#ifndef USE_TENSORRT
         if (excluded_provider_types.count(provider_type) > 0)
           continue;
+#endif
 
         cur_provider = provider_type;
 
@@ -1146,6 +1154,10 @@ void OpTester::Run(
         has_run = true;
 
         ASSERT_PROVIDER_STATUS_OK(session_object.RegisterExecutionProvider(std::move(execution_provider)));
+#ifdef USE_TENSORRT
+        std::unique_ptr<IExecutionProvider> fallback_execution_provider = DefaultCudaExecutionProvider();
+        ASSERT_PROVIDER_STATUS_OK(session_object.RegisterExecutionProvider(std::move(fallback_execution_provider)));
+#endif
         fetches_ = ExecuteModel<InferenceSession>(
             *p_model, session_object, expect_result, expected_failure_string,
             run_options, feeds, output_names, provider_type, allow_released_onnx_opset_only);

From 2c6ccd9a5ed4597cce99b824f37ce229b270aebb Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Tue, 25 Jan 2022 23:20:53 +0000
Subject: [PATCH 15/25] Add fallback execution provider for unit test (cont)

---
 onnxruntime/test/providers/provider_test_utils.cc | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/onnxruntime/test/providers/provider_test_utils.cc b/onnxruntime/test/providers/provider_test_utils.cc
index a9b7c328710dd..a215118d97b7a 100644
--- a/onnxruntime/test/providers/provider_test_utils.cc
+++ b/onnxruntime/test/providers/provider_test_utils.cc
@@ -1062,10 +1062,8 @@ void OpTester::Run(
 
     } else {
       for (const std::string& provider_type : all_provider_types) {
-#ifndef USE_TENSORRT
         if (excluded_provider_types.count(provider_type) > 0)
           continue;
-#endif
 
         cur_provider = provider_type;
 
@@ -1180,8 +1178,12 @@ void OpTester::Run(
         cur_provider = "not set";
       }
 
+#ifdef USE_TENSORRT
+      ORT_UNUSED_PARAMETER(has_run);
+#else
       EXPECT_TRUE(has_run)
           << "No registered execution providers were able to run the model.";
+#endif
     }
   }
   ORT_CATCH(const std::exception& ex) {

From 6d228867153149041b41458eae4593c069e413eb Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Wed, 26 Jan 2022 02:16:07 +0000
Subject: [PATCH 16/25] add conditional to add fackback cuda ep

---
 onnxruntime/test/providers/provider_test_utils.cc | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/onnxruntime/test/providers/provider_test_utils.cc b/onnxruntime/test/providers/provider_test_utils.cc
index a215118d97b7a..43f1ad4c04838 100644
--- a/onnxruntime/test/providers/provider_test_utils.cc
+++ b/onnxruntime/test/providers/provider_test_utils.cc
@@ -1153,8 +1153,10 @@ void OpTester::Run(
 
         ASSERT_PROVIDER_STATUS_OK(session_object.RegisterExecutionProvider(std::move(execution_provider)));
 #ifdef USE_TENSORRT
-        std::unique_ptr<IExecutionProvider> fallback_execution_provider = DefaultCudaExecutionProvider();
-        ASSERT_PROVIDER_STATUS_OK(session_object.RegisterExecutionProvider(std::move(fallback_execution_provider)));
+        if (excluded_provider_types.count(onnxruntime::kCudaExecutionProvider) == 0) {
+            std::unique_ptr<IExecutionProvider> fallback_execution_provider = DefaultCudaExecutionProvider();
+            ASSERT_PROVIDER_STATUS_OK(session_object.RegisterExecutionProvider(std::move(fallback_execution_provider)));
+        }
 #endif
         fetches_ = ExecuteModel<InferenceSession>(
             *p_model, session_object, expect_result, expected_failure_string,

From a5e6a827bca3abbc5ddf422aba6e18ffbf268ea2 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Wed, 26 Jan 2022 18:34:01 +0000
Subject: [PATCH 17/25] Reduction op takes much longer time for TRT 8.2, so we
 test smaller range of inputs

---
 .../cpu/reduction/reduction_ops_test.cc       | 27 ++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc b/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc
index f7ffdf8b91661..b45818d21b402 100644
--- a/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc
+++ b/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc
@@ -1555,9 +1555,16 @@ void test_apex_reduce_sum(
 TEST(ReductionOpTest, ReduceSum_apex_matrix_large) {
   for (int64_t m = 1; m < 2049; m *= 8) {
     for (int64_t n = 2; n < 2049; n *= 8) {
+#ifdef USE_TENSORRT 
+      // Reduction op takes much longer time for TRT 8.2, so we test smaller range of inputs.
+      if (m * n > 4096) {
+        continue;
+      }
+#else
       if (m * n > 32768) {
         continue;
       }
+#endif
       test_apex_reduce_sum(m, n);
       test_apex_reduce_sum(m + 1, n);
       test_apex_reduce_sum(m + 3, n);
@@ -1583,7 +1590,13 @@ TEST(ReductionOpTest, ReduceSum_batch_by_two) {
 }
 
 TEST(ReductionOpTest, ReduceSum_batch_by_seq_by_128) {
-  for (int i = 1; i < 16; i += 1) {
+#ifdef USE_TENSORRT 
+  // Reduction op takes much longer time for TRT 8.2, so we test smaller range of inputs.
+  int i_max = 8;
+#else
+  int i_max = 16; 
+#endif
+  for (int i = 1; i < i_max; i += 1) {
     test_apex_reduce_sum(i * 128, 128);
     test_apex_reduce_sum(i * 512, 128);
     test_apex_reduce_sum(i * 128, 768);
@@ -1612,8 +1625,16 @@ TEST(ReductionOpTest, ReduceSum_bert_selected_batch_size) {
 
 TEST(ReductionOpTest, ReduceSum_apex_more) {
   std::srand(0);
-  for (int64_t m = 1; m < 16; ++m) {
-    for (int64_t n = 1; n < 16; ++n) {
+#ifdef USE_TENSORRT 
+  // Reduction op takes much longer time for TRT 8.2, so we test smaller range of inputs.
+  int64_t m_max = 8;
+  int64_t n_max = 8;
+#else
+  int64_t m_max = 16;
+  int64_t n_max = 16; 
+#endif
+  for (int64_t m = 1; m < m_max; ++m) {
+    for (int64_t n = 1; n < n_max; ++n) {
       const auto m_ = 2 * m;
       const auto n_ = 2 * n;
       test_apex_reduce_sum(m_, n_);

From 3b5a9985a752861d5565ad0c021da4cf3b6c173d Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Wed, 26 Jan 2022 18:38:02 +0000
Subject: [PATCH 18/25] use M60

---
 .../github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml | 4 ++--
 .../github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
index 3c12697973231..5404dfb167808 100644
--- a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
@@ -1,6 +1,6 @@
 jobs:
 - job: Linux_CI_GPU_TENSORRT_Dev
-  pool: onnxruntime-tensorrt-linuxbuild-t4
+  pool: onnxruntime-tensorrt-linuxbuild
   variables:
     ALLOW_RELEASED_ONNX_OPSET_ONLY: '1'
   timeoutInMinutes: 180
@@ -39,7 +39,7 @@ jobs:
               --enable_onnx_tests --use_cuda --cuda_version=11.4 --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \
               --enable_pybind --build_java --build_nodejs \
               --use_tensorrt --tensorrt_home /usr \
-              --cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-10/root/usr/bin/cc  "CMAKE_CUDA_ARCHITECTURES=75"
+              --cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-10/root/usr/bin/cc  "CMAKE_CUDA_ARCHITECTURES=52"
         workingDirectory: $(Build.SourcesDirectory)
     - task: PublishTestResults@2
       displayName: 'Publish unit test results'
diff --git a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml
index 597cbcd81fe2b..7ec488cee870f 100644
--- a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml
@@ -1,6 +1,6 @@
 jobs:
 - job: 'build'
-  pool: 'onnxruntime-tensorrt8-winbuild-t4'
+  pool: 'onnxruntime-tensorrt8-winbuild'
   variables:
     OrtPackageId: 'Microsoft.ML.OnnxRuntime'
     MsbuildArguments: '-detailedsummary -maxcpucount -consoleloggerparameters:PerformanceSummary'
@@ -47,7 +47,7 @@ jobs:
     displayName: 'Generate cmake config'
     inputs:
       scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py'
-      arguments: '--config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 16 2019" --build_wheel --enable_onnx_tests --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.2.1.8.Windows10.x86_64.cuda-11.4.cudnn8.2" --cuda_version=11.4 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.4" --cudnn_home="C:\local\cudnn-11.4-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES="52;70;75"'
+      arguments: '--config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 16 2019" --build_wheel --enable_onnx_tests --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.2.1.8.Windows10.x86_64.cuda-11.4.cudnn8.2" --cuda_version=11.4 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.4" --cudnn_home="C:\local\cudnn-11.4-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES="52"'
       workingDirectory: '$(Build.BinariesDirectory)'
 
   - task: VSBuild@1

From f0e643cd54d0ed676b58fa44e3a6aecbcaa63f94 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Thu, 27 Jan 2022 06:02:36 +0000
Subject: [PATCH 19/25] revert code

---
 onnxruntime/test/providers/cpu/model_tests.cc | 41 +------------------
 1 file changed, 1 insertion(+), 40 deletions(-)

diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc
index 1de031bfaded8..fbdb421e5fa46 100644
--- a/onnxruntime/test/providers/cpu/model_tests.cc
+++ b/onnxruntime/test/providers/cpu/model_tests.cc
@@ -3,9 +3,6 @@
 
 #include <iterator>
 #include <gtest/gtest.h>
-#include <string>
-#include <codecvt>
-#include <locale>
 
 #include "core/session/onnxruntime_c_api.h"
 #include "core/session/onnxruntime_cxx_api.h"
@@ -993,43 +990,7 @@ ::std::vector<::std::basic_string<ORTCHAR_T>> GetParameterStrings() {
   return v;
 }
 
-auto ExpandModelName  = [](const ::testing::TestParamInfo<ModelTest::ParamType>& info) {
-  // use info.param here to generate the test suffix
-  std::basic_string<ORTCHAR_T> name = info.param;
-
-  // the original name here is the combination of provider name and model path name
-  // remove the trailing 'xxxxxxx/model.onnx' of name
-  if (name.size() > 11 && name.substr(name.size() - 11) == ORT_TSTR("/model.onnx")) {
-    name = name.substr(0, info.param.size() - 11);
-  }
-  // remove the trailing 'xxxxxx.onnx' of name
-  else if (name.size() > 5 && name.substr(name.size() - 5) == ORT_TSTR(".onnx")) {
-    name = name.substr(0, info.param.size() - 5);
-  }
-
-  // Note: test name only accepts '_' and alphanumeric
-  // replace '/' or '\' with '_'
-  std::replace(name.begin(), name.end(), '/', '_');
-  std::replace(name.begin(), name.end(), '\\', '_');
-
-  // Note: test name only accepts '_' and alphanumeric
-  // remove '.' and '-'
-  char chars[] = ".-";
-  for (unsigned int i = 0; i < strlen(chars); ++i) {
-    name.erase(std::remove(name.begin(), name.end(), chars[i]), name.end());
-  }
-#ifdef _WIN32
-  // Note: The return value of INSTANTIATE_TEST_SUITE_P accpets std::basic_string<char...>.
-  // Need conversion of wchar_t to char.
-  return std::wstring_convert<std::codecvt_utf8<wchar_t>>().to_bytes(name);
-#else
-  return name;
-#endif
-};
-
-// The optional last argument is a function or functor that generates custom test name suffixes based on the test parameters.
-// Specify the last argument to make test name more meaningful and clear instead of just the sequential number.
-INSTANTIATE_TEST_SUITE_P(ModelTests, ModelTest, testing::ValuesIn(GetParameterStrings()), ExpandModelName);
+INSTANTIATE_TEST_SUITE_P(ModelTests, ModelTest, testing::ValuesIn(GetParameterStrings()));
 
 }  // namespace test
 }  // namespace onnxruntime

From daa6a1ca5ec6322d58716df0f41e54cfd8291d0d Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Thu, 27 Jan 2022 06:06:34 +0000
Subject: [PATCH 20/25] revert code

---
 .../github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml   | 2 +-
 .../github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
index 5404dfb167808..3b89fa63cacdb 100644
--- a/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-gpu-tensorrt-ci-pipeline.yml
@@ -39,7 +39,7 @@ jobs:
               --enable_onnx_tests --use_cuda --cuda_version=11.4 --cuda_home=/usr/local/cuda-11.4 --cudnn_home=/usr/local/cuda-11.4 \
               --enable_pybind --build_java --build_nodejs \
               --use_tensorrt --tensorrt_home /usr \
-              --cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-10/root/usr/bin/cc  "CMAKE_CUDA_ARCHITECTURES=52"
+              --cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-10/root/usr/bin/cc  CMAKE_CUDA_ARCHITECTURES=52
         workingDirectory: $(Build.SourcesDirectory)
     - task: PublishTestResults@2
       displayName: 'Publish unit test results'
diff --git a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml
index 7ec488cee870f..ad6a5d2a4d555 100644
--- a/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/win-gpu-tensorrt-ci-pipeline.yml
@@ -47,7 +47,7 @@ jobs:
     displayName: 'Generate cmake config'
     inputs:
       scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py'
-      arguments: '--config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 16 2019" --build_wheel --enable_onnx_tests --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.2.1.8.Windows10.x86_64.cuda-11.4.cudnn8.2" --cuda_version=11.4 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.4" --cudnn_home="C:\local\cudnn-11.4-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES="52"'
+      arguments: '--config $(BuildConfig) --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 16 2019" --build_wheel --enable_onnx_tests --use_tensorrt --tensorrt_home="C:\local\TensorRT-8.2.1.8.Windows10.x86_64.cuda-11.4.cudnn8.2" --cuda_version=11.4 --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.4" --cudnn_home="C:\local\cudnn-11.4-windows-x64-v8.2.2.26\cuda" --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=52'
       workingDirectory: '$(Build.BinariesDirectory)'
 
   - task: VSBuild@1

From 18c5dd883e62e07a616eb582618c5e3fac080159 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Thu, 27 Jan 2022 06:21:23 +0000
Subject: [PATCH 21/25] add comments

---
 .../providers/cpu/reduction/reduction_ops_test.cc | 15 +++++++--------
 onnxruntime/test/providers/provider_test_utils.cc |  2 ++
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc b/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc
index 7ed05b7ba52a0..561f86f86228f 100644
--- a/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc
+++ b/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc
@@ -1553,18 +1553,17 @@ void test_apex_reduce_sum(
 }
 
 TEST(ReductionOpTest, ReduceSum_apex_matrix_large) {
-  for (int64_t m = 1; m < 2049; m *= 8) {
-    for (int64_t n = 2; n < 2049; n *= 8) {
 #ifdef USE_TENSORRT 
-      // Reduction op takes much longer time for TRT 8.2, so we test smaller range of inputs.
-      if (m * n > 4096) {
-        continue;
-      }
+  // Reduction op takes much longer time for TRT 8.2, so we test smaller range of inputs.
+  int64_t threshold = 4096;
 #else
-      if (m * n > 32768) {
+  int64_t threshold = 32768; 
+#endif
+  for (int64_t m = 1; m < 2049; m *= 8) {
+    for (int64_t n = 2; n < 2049; n *= 8) {
+      if (m * n > threshold) {
         continue;
       }
-#endif
       test_apex_reduce_sum(m, n);
       test_apex_reduce_sum(m + 1, n);
       test_apex_reduce_sum(m + 3, n);
diff --git a/onnxruntime/test/providers/provider_test_utils.cc b/onnxruntime/test/providers/provider_test_utils.cc
index d403fe2fd20b9..85c15a5c430d9 100644
--- a/onnxruntime/test/providers/provider_test_utils.cc
+++ b/onnxruntime/test/providers/provider_test_utils.cc
@@ -995,6 +995,7 @@ void OpTester::Run(
     FillFeedsAndOutputNames(feeds, output_names);
     // Run the model
 #ifdef USE_TENSORRT
+    // Only include trt ep to reduce test time (with cuda ep as fallback ep if cuda ep is not in the excluded_provider_types)
     static const std::string all_provider_types[] = {
         kTensorrtExecutionProvider,
     };
@@ -1153,6 +1154,7 @@ void OpTester::Run(
 
         ASSERT_PROVIDER_STATUS_OK(session_object.RegisterExecutionProvider(std::move(execution_provider)));
 #ifdef USE_TENSORRT
+        // make cuda ep as fallback ep for trt ep if cuda ep is not in the excluded_provider_types
         if (excluded_provider_types.count(onnxruntime::kCudaExecutionProvider) == 0) {
             std::unique_ptr<IExecutionProvider> fallback_execution_provider = DefaultCudaExecutionProvider();
             ASSERT_PROVIDER_STATUS_OK(session_object.RegisterExecutionProvider(std::move(fallback_execution_provider)));

From 93013ec68ed838059dc0de5b07f5305a3ef2d121 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Thu, 27 Jan 2022 18:47:59 +0000
Subject: [PATCH 22/25] Modify code and add comment

---
 cmake/onnxruntime_unittests.cmake                 | 2 ++
 onnxruntime/test/providers/provider_test_utils.cc | 7 -------
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
index 5f54400c8dea3..9afbec00d6ff8 100644
--- a/cmake/onnxruntime_unittests.cmake
+++ b/cmake/onnxruntime_unittests.cmake
@@ -684,6 +684,8 @@ endif()
 
 set(test_all_args)
 if (onnxruntime_USE_TENSORRT)
+    # TRT EP CI takes much longer time when updating to TRT 8.2
+    # So, we only run trt ep and exclude other eps to reduce CI test time.  
     list(APPEND test_all_args "--gtest_filter=-*cpu__*:*cuda__*" )
 endif ()
 
diff --git a/onnxruntime/test/providers/provider_test_utils.cc b/onnxruntime/test/providers/provider_test_utils.cc
index 85c15a5c430d9..dd8a4f1a2c497 100644
--- a/onnxruntime/test/providers/provider_test_utils.cc
+++ b/onnxruntime/test/providers/provider_test_utils.cc
@@ -1153,13 +1153,6 @@ void OpTester::Run(
         has_run = true;
 
         ASSERT_PROVIDER_STATUS_OK(session_object.RegisterExecutionProvider(std::move(execution_provider)));
-#ifdef USE_TENSORRT
-        // make cuda ep as fallback ep for trt ep if cuda ep is not in the excluded_provider_types
-        if (excluded_provider_types.count(onnxruntime::kCudaExecutionProvider) == 0) {
-            std::unique_ptr<IExecutionProvider> fallback_execution_provider = DefaultCudaExecutionProvider();
-            ASSERT_PROVIDER_STATUS_OK(session_object.RegisterExecutionProvider(std::move(fallback_execution_provider)));
-        }
-#endif
         fetches_ = ExecuteModel<InferenceSession>(
             *p_model, session_object, expect_result, expected_failure_string,
             run_options, feeds, output_names, provider_type, allow_released_onnx_opset_only);

From 2a75eb862553eeb7271c017ef823351a0c5b7e6a Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Thu, 27 Jan 2022 22:06:35 +0000
Subject: [PATCH 23/25] modify comment

---
 onnxruntime/test/providers/provider_test_utils.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/test/providers/provider_test_utils.cc b/onnxruntime/test/providers/provider_test_utils.cc
index dd8a4f1a2c497..badc81529fcd7 100644
--- a/onnxruntime/test/providers/provider_test_utils.cc
+++ b/onnxruntime/test/providers/provider_test_utils.cc
@@ -995,7 +995,7 @@ void OpTester::Run(
     FillFeedsAndOutputNames(feeds, output_names);
     // Run the model
 #ifdef USE_TENSORRT
-    // Only include trt ep to reduce test time (with cuda ep as fallback ep if cuda ep is not in the excluded_provider_types)
+    // only run trt ep to reduce test time
     static const std::string all_provider_types[] = {
         kTensorrtExecutionProvider,
     };

From 8880456377b141552b7e2d8e6e7a46db9bb2fb01 Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Sat, 29 Jan 2022 21:38:12 +0000
Subject: [PATCH 24/25] update comment

---
 cmake/onnxruntime_unittests.cmake | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
index 9afbec00d6ff8..bfd4b7e755cb5 100644
--- a/cmake/onnxruntime_unittests.cmake
+++ b/cmake/onnxruntime_unittests.cmake
@@ -686,6 +686,10 @@ set(test_all_args)
 if (onnxruntime_USE_TENSORRT)
     # TRT EP CI takes much longer time when updating to TRT 8.2
     # So, we only run trt ep and exclude other eps to reduce CI test time.  
+    #
+    # The test names of model tests were using sequential number in the past.
+    # This PR https://github.com/microsoft/onnxruntime/pull/10220 (Please see ExpandModelName function in model_tests.cc for more details) 
+    # made test name contain the "ep" and "model path" information, so we can easily filter the tests using cuda ep or other ep with *cpu__* or *xxx__*.  
     list(APPEND test_all_args "--gtest_filter=-*cpu__*:*cuda__*" )
 endif ()
 

From 42dc63cfe237eb1e0f63a7501d163bae2e766f7c Mon Sep 17 00:00:00 2001
From: Chi Lo <Chi.Lo@microsoft.com>
Date: Mon, 31 Jan 2022 18:46:23 +0000
Subject: [PATCH 25/25] add comment

---
 onnxruntime/test/providers/provider_test_utils.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/onnxruntime/test/providers/provider_test_utils.cc b/onnxruntime/test/providers/provider_test_utils.cc
index badc81529fcd7..29148bcf8e68e 100644
--- a/onnxruntime/test/providers/provider_test_utils.cc
+++ b/onnxruntime/test/providers/provider_test_utils.cc
@@ -1176,6 +1176,8 @@ void OpTester::Run(
       }
 
 #ifdef USE_TENSORRT
+      // We are allowing tests to be run with only TensorRT EP, but TensorRT EP may not support all tests and may be in excluded providers list.
+      // So, no registered EPs were able to run the model is okay for this situation.
       ORT_UNUSED_PARAMETER(has_run);
 #else
       EXPECT_TRUE(has_run)