Reduce test time for TensorRT EP CI (#10408)

* expand model tests name * skip cpu/cuda for trt when running onnxruntime_test_all * only run trt ep for c++ unit test * Update CMAKE_CUDA_ARCHITECTURES for T4 * Use new t4 agent pool * Update YAML for run T4 on Windows * revert code * Update CMAKE_CUDA_ARCHITECTURES * fix wrong value * Remove cpu/cuda directly in model tests * add only CMAKE_CUDA_ARCHITECTURES=75 * remove expanding model test name to see difference * revert code * Add fallback execution provider for unit test * Add fallback execution provider for unit test (cont) * add conditional to add fackback cuda ep * Reduction op takes much longer time for TRT 8.2, so we test smaller range of inputs * use M60 * revert code * revert code * add comments * Modify code and add comment * modify comment * update comment * add comment
microsoft · Feb 1, 2022 · a7c6786 · a7c6786
1 parent ef7b4dc
commit a7c6786
Show file tree

Hide file tree

Showing 3 changed files with 51 additions and 6 deletions.
diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
@@ -15,7 +15,7 @@ endif()
 
 set(disabled_warnings)
 function(AddTest)
-  cmake_parse_arguments(_UT "DYN" "TARGET" "LIBS;SOURCES;DEPENDS" ${ARGN})
+  cmake_parse_arguments(_UT "DYN" "TARGET" "LIBS;SOURCES;DEPENDS;TEST_ARGS" ${ARGN})
   list(REMOVE_DUPLICATES _UT_SOURCES)
 
   if (${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
@@ -96,7 +96,7 @@ function(AddTest)
     target_compile_options(${_UT_TARGET} PRIVATE "-Wno-error=uninitialized")
   endif()
 
-  set(TEST_ARGS)
+  set(TEST_ARGS ${_UT_TEST_ARGS})
   if (onnxruntime_GENERATE_TEST_REPORTS)
     # generate a report file next to the test program
     if (onnxruntime_BUILD_WEBASSEMBLY)
@@ -685,13 +685,25 @@ if (onnxruntime_BUILD_WEBASSEMBLY)
   endif()
 endif()
 
+set(test_all_args)
+if (onnxruntime_USE_TENSORRT)
+    # TRT EP CI takes much longer time when updating to TRT 8.2
+    # So, we only run trt ep and exclude other eps to reduce CI test time.  
+    #
+    # The test names of model tests were using sequential number in the past.
+    # This PR https://github.com/microsoft/onnxruntime/pull/10220 (Please see ExpandModelName function in model_tests.cc for more details) 
+    # made test name contain the "ep" and "model path" information, so we can easily filter the tests using cuda ep or other ep with *cpu__* or *xxx__*.  
+    list(APPEND test_all_args "--gtest_filter=-*cpu__*:*cuda__*" )
+endif ()
+
 AddTest(
   TARGET onnxruntime_test_all
   SOURCES ${all_tests} ${onnxruntime_unittest_main_src}
   LIBS
     onnx_test_runner_common ${onnxruntime_test_providers_libs} ${onnxruntime_test_common_libs}
     onnx_test_data_proto nlohmann_json::nlohmann_json
   DEPENDS ${all_dependencies}
+  TEST_ARGS ${test_all_args} 
 )
 if (MSVC)
   # The warning means the type of two integral values around a binary operator is narrow than their result.

diff --git a/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc b/onnxruntime/test/providers/cpu/reduction/reduction_ops_test.cc
@@ -1557,9 +1557,15 @@ void test_apex_reduce_sum(
 }
 
 TEST(ReductionOpTest, ReduceSum_apex_matrix_large) {
+#ifdef USE_TENSORRT 
+  // Reduction op takes much longer time for TRT 8.2, so we test smaller range of inputs.
+  int64_t threshold = 4096;
+#else
+  int64_t threshold = 32768; 
+#endif
   for (int64_t m = 1; m < 2049; m *= 8) {
     for (int64_t n = 2; n < 2049; n *= 8) {
-      if (m * n > 32768) {
+      if (m * n > threshold) {
         continue;
       }
       test_apex_reduce_sum(m, n);
@@ -1587,7 +1593,13 @@ TEST(ReductionOpTest, ReduceSum_batch_by_two) {
 }
 
 TEST(ReductionOpTest, ReduceSum_batch_by_seq_by_128) {
-  for (int i = 1; i < 16; i += 1) {
+#ifdef USE_TENSORRT 
+  // Reduction op takes much longer time for TRT 8.2, so we test smaller range of inputs.
+  int i_max = 8;
+#else
+  int i_max = 16; 
+#endif
+  for (int i = 1; i < i_max; i += 1) {
     test_apex_reduce_sum(i * 128, 128);
     test_apex_reduce_sum(i * 512, 128);
     test_apex_reduce_sum(i * 128, 768);
@@ -1616,8 +1628,16 @@ TEST(ReductionOpTest, ReduceSum_bert_selected_batch_size) {
 
 TEST(ReductionOpTest, ReduceSum_apex_more) {
   std::srand(0);
-  for (int64_t m = 1; m < 16; ++m) {
-    for (int64_t n = 1; n < 16; ++n) {
+#ifdef USE_TENSORRT 
+  // Reduction op takes much longer time for TRT 8.2, so we test smaller range of inputs.
+  int64_t m_max = 8;
+  int64_t n_max = 8;
+#else
+  int64_t m_max = 16;
+  int64_t n_max = 16; 
+#endif
+  for (int64_t m = 1; m < m_max; ++m) {
+    for (int64_t n = 1; n < n_max; ++n) {
       const auto m_ = 2 * m;
       const auto n_ = 2 * n;
       test_apex_reduce_sum(m_, n_);

diff --git a/onnxruntime/test/providers/provider_test_utils.cc b/onnxruntime/test/providers/provider_test_utils.cc
@@ -994,6 +994,12 @@ void OpTester::Run(
     std::vector<std::string> output_names;
     FillFeedsAndOutputNames(feeds, output_names);
     // Run the model
+#ifdef USE_TENSORRT
+    // only run trt ep to reduce test time
+    static const std::string all_provider_types[] = {
+        kTensorrtExecutionProvider,
+    };
+#else
     static const std::string all_provider_types[] = {
         kCpuExecutionProvider,
         kCudaExecutionProvider,
@@ -1008,6 +1014,7 @@ void OpTester::Run(
         kRocmExecutionProvider,
         kCoreMLExecutionProvider,
     };
+#endif
 
     bool has_run = false;
 
@@ -1168,8 +1175,14 @@ void OpTester::Run(
         cur_provider = "not set";
       }
 
+#ifdef USE_TENSORRT
+      // We are allowing tests to be run with only TensorRT EP, but TensorRT EP may not support all tests and may be in excluded providers list.
+      // So, no registered EPs were able to run the model is okay for this situation.
+      ORT_UNUSED_PARAMETER(has_run);
+#else
       EXPECT_TRUE(has_run)
           << "No registered execution providers were able to run the model.";
+#endif
     }
   }
   ORT_CATCH(const std::exception& ex) {