Revert DML pipeline changes (#23135)

### Description Previously we wanted to add DirectML EP to existing onnxruntime Windows CUDA packages. After careful consideration, we will postpone the change. This PR reverts some pipeline changes previously made by @mszhanyi and @jchen351 .
microsoft · Dec 18, 2024 · 5d7030e · 5d7030e
1 parent e76bd2f
commit 5d7030e
Show file tree

Hide file tree

Showing 43 changed files with 94 additions and 456 deletions.
diff --git a/java/src/test/java/ai/onnxruntime/InferenceTest.java b/java/src/test/java/ai/onnxruntime/InferenceTest.java
@@ -737,7 +737,6 @@ public void testCoreML() throws OrtException {
     runProvider(OrtProvider.CORE_ML);
   }
 
-  @Disabled("DirectML Java API hasn't been supported yet")
   @Test
   @EnabledIfSystemProperty(named = "USE_DML", matches = "1")
   public void testDirectML() throws OrtException {

diff --git a/java/src/test/java/ai/onnxruntime/providers/ProviderOptionsTest.java b/java/src/test/java/ai/onnxruntime/providers/ProviderOptionsTest.java
@@ -27,15 +27,13 @@
 import java.util.HashMap;
 import java.util.Map;
 import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.condition.DisabledIfSystemProperty;
 import org.junit.jupiter.api.condition.EnabledIfSystemProperty;
 
 public class ProviderOptionsTest {
   private static final OrtEnvironment env = TestHelpers.getOrtEnvironment();
 
   @Test
   @EnabledIfSystemProperty(named = "USE_CUDA", matches = "1")
-  @DisabledIfSystemProperty(named = "NO_CUDA_TEST", matches = "1")
   public void testCUDAOptions() throws OrtException {
     // Test standard options
     OrtCUDAProviderOptions cudaOpts = new OrtCUDAProviderOptions(0);
@@ -63,7 +61,6 @@ public void testCUDAOptions() throws OrtException {
 
   @Test
   @EnabledIfSystemProperty(named = "USE_TENSORRT", matches = "1")
-  @DisabledIfSystemProperty(named = "NO_CUDA_TEST", matches = "1")
   public void testTensorRT() throws OrtException {
     // Test standard options
     OrtTensorRTProviderOptions rtOpts = new OrtTensorRTProviderOptions(0);

diff --git a/onnxruntime/test/common/cuda_op_test_utils.h b/onnxruntime/test/common/cuda_op_test_utils.h
@@ -5,11 +5,6 @@
 
 #include "test/util/include/default_providers.h"
 
-#define SKIP_CUDA_TEST_WITH_DML                                          \
-  if (DefaultCudaExecutionProvider() == nullptr) {                       \
-    GTEST_SKIP() << "CUDA Tests are not supported while DML is enabled"; \
-  }
-
 namespace onnxruntime {
 namespace test {
 
@@ -18,10 +13,6 @@ namespace test {
 int GetCudaArchitecture();
 
 inline bool HasCudaEnvironment(int min_cuda_architecture) {
-  if (DefaultCudaExecutionProvider() == nullptr) {
-    return false;
-  }
-
   if (DefaultCudaExecutionProvider().get() == nullptr) {
     return false;
   }

diff --git a/onnxruntime/test/contrib_ops/beam_search_test.cc b/onnxruntime/test/contrib_ops/beam_search_test.cc
@@ -75,9 +75,6 @@ TEST(BeamSearchTest, GptBeamSearchFp32) {
   const char* const output_names[] = {"sequences"};
 
   Ort::SessionOptions session_options;
-#if defined(USE_CUDA) && defined(USE_DML)
-  SKIP_CUDA_TEST_WITH_DML;
-#endif
 #ifdef USE_CUDA
   OrtCUDAProviderOptionsV2 cuda_options;
   cuda_options.use_tf32 = false;
@@ -171,9 +168,6 @@ TEST(BeamSearchTest, GptBeamSearchFp16) {
   bool enable_rocm = (nullptr != DefaultRocmExecutionProvider().get());
   if (enable_cuda || enable_rocm) {
     Ort::SessionOptions session_options;
-#if defined(USE_CUDA) && defined(USE_DML)
-    SKIP_CUDA_TEST_WITH_DML;
-#endif
 #ifdef USE_CUDA
     OrtCUDAProviderOptionsV2 cuda_options;
     cuda_options.use_tf32 = false;

diff --git a/onnxruntime/test/contrib_ops/bias_dropout_op_test.cc b/onnxruntime/test/contrib_ops/bias_dropout_op_test.cc
@@ -181,9 +181,6 @@ void RunBiasDropoutTest(const bool use_mask, const std::vector<int64_t>& input_s
   t.SetCustomOutputVerifier(output_verifier);
   std::vector<std::unique_ptr<IExecutionProvider>> t_eps;
 #ifdef USE_CUDA
-  if (DefaultCudaExecutionProvider() == nullptr) {
-    return;
-  }
   t_eps.emplace_back(DefaultCudaExecutionProvider());
 #elif USE_ROCM
   t_eps.emplace_back(DefaultRocmExecutionProvider());

diff --git a/onnxruntime/test/contrib_ops/bitmask_dropout_op_test.cc b/onnxruntime/test/contrib_ops/bitmask_dropout_op_test.cc
@@ -61,9 +61,7 @@ void RunTestForInference(const std::vector<int64_t>& input_dims, bool has_ratio
 
   std::vector<std::unique_ptr<IExecutionProvider>> test_eps;
 #ifdef USE_CUDA
-  if (DefaultCudaExecutionProvider() != nullptr) {
-    test_eps.emplace_back(DefaultCudaExecutionProvider());
-  }
+  test_eps.emplace_back(DefaultCudaExecutionProvider());
 #elif USE_ROCM
   test_eps.emplace_back(DefaultRocmExecutionProvider());
 #endif
@@ -124,9 +122,6 @@ void RunTestForTraining(const std::vector<int64_t>& input_dims) {
 
     std::vector<std::unique_ptr<IExecutionProvider>> dropout_eps;
 #ifdef USE_CUDA
-    if (DefaultCudaExecutionProvider() == nullptr) {
-      return;
-    }
     dropout_eps.emplace_back(DefaultCudaExecutionProvider());
 #elif USE_ROCM
     dropout_eps.emplace_back(DefaultRocmExecutionProvider());

diff --git a/onnxruntime/test/contrib_ops/layer_norm_test.cc b/onnxruntime/test/contrib_ops/layer_norm_test.cc
@@ -2,7 +2,6 @@
 // Licensed under the MIT License.
 
 #include "test/providers/compare_provider_test_utils.h"
-#include "test/util/include/default_providers.h"
 
 namespace onnxruntime {
 namespace test {
@@ -80,20 +79,14 @@ static void TestLayerNorm(const std::vector<int64_t>& x_dims,
 #endif
 
 #ifdef USE_CUDA
-  if (DefaultCudaExecutionProvider() != nullptr) {
-    test.CompareWithCPU(kCudaExecutionProvider);
-  }
+  test.CompareWithCPU(kCudaExecutionProvider);
 #elif USE_ROCM
   test.CompareWithCPU(kRocmExecutionProvider);
+#elif USE_DML
+  test.CompareWithCPU(kDmlExecutionProvider);
 #elif USE_WEBGPU
   test.CompareWithCPU(kWebGpuExecutionProvider);
 #endif
-
-#ifdef USE_DML
-  if (DefaultDmlExecutionProvider() != nullptr) {
-    test.CompareWithCPU(kDmlExecutionProvider);
-  }
-#endif
 }
 
 TEST(CudaKernelTest, LayerNorm_NullInput) {

diff --git a/onnxruntime/test/contrib_ops/matmul_4bits_test.cc b/onnxruntime/test/contrib_ops/matmul_4bits_test.cc
@@ -490,17 +490,13 @@ void RunTest(int64_t M, int64_t N, int64_t K, int64_t block_size, int64_t accura
   std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
   if (use_float16) {
 #ifdef USE_CUDA
-    if (DefaultCudaExecutionProvider() != nullptr) {
-      execution_providers.push_back(DefaultCudaExecutionProvider());
-    }
+    execution_providers.push_back(DefaultCudaExecutionProvider());
 #endif
 #ifdef USE_ROCM
     execution_providers.push_back(DefaultRocmExecutionProvider());
 #endif
 #ifdef USE_DML
-    if (DefaultDmlExecutionProvider() != nullptr) {
-      execution_providers.push_back(DefaultDmlExecutionProvider());
-    }
+    execution_providers.push_back(DefaultDmlExecutionProvider());
 #endif
 #ifdef USE_WEBGPU
     execution_providers.push_back(DefaultWebGpuExecutionProvider());
@@ -518,11 +514,8 @@ void RunTest(int64_t M, int64_t N, int64_t K, int64_t block_size, int64_t accura
 }  // namespace
 
 TEST(MatMulNBits, Float16Cuda) {
-#if defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DML)
-  std::vector<bool> has_gidx_options = {true, false};
-  if (DefaultDmlExecutionProvider() != nullptr) {
-    has_gidx_options.assign(1, false);
-  }
+#if defined(USE_CUDA) || defined(USE_ROCM)
+  auto has_gidx_options = {true, false};
 #else
   auto has_gidx_options = {false};
 #endif
@@ -533,9 +526,7 @@ TEST(MatMulNBits, Float16Cuda) {
         for (auto block_size : {16, 32, 64, 128}) {
           for (auto has_gidx : has_gidx_options) {
 #ifdef USE_DML
-            if (DefaultDmlExecutionProvider() != nullptr) {
-              RunTest(M, N, K, block_size, 0, false, true, has_gidx, true, 0.04f);
-            }
+            RunTest(M, N, K, block_size, 0, false, true, has_gidx, true, 0.04f);
 #else
             RunTest(M, N, K, block_size, 0, false, true, has_gidx);
             RunTest(M, N, K, block_size, 0, true, true, has_gidx, false);
@@ -548,16 +539,12 @@ TEST(MatMulNBits, Float16Cuda) {
 }
 
 TEST(MatMulNBits, Float16Large) {
-#if defined(USE_CUDA) || defined(USE_DML)
+#ifdef USE_DML
   // For some reason, the A10 machine that runs these tests during CI has a much bigger error than all retail
   // machines we tested on. All consumer-grade machines from Nvidia/AMD/Intel seem to pass these tests with an
   // absolute error of 0.08, but the A10 has errors going as high as 0.22. Ultimately, given the large number
   // of elements in this test, ULPs should probably be used instead of absolute/relative tolerances.
-  float abs_error = 0.05f;
-  if (DefaultDmlExecutionProvider() != nullptr) {
-    // it means the ep is dml in runtime, the abs_error is changed to 0.3f
-    abs_error = 0.3f;
-  }
+  float abs_error = 0.3f;
 #elif USE_WEBGPU
   // See Intel A770 to pass these tests with an absolute error of 0.08.
   float abs_error = 0.08f;
@@ -573,6 +560,7 @@ TEST(MatMulNBits, Float16Large) {
     }
   }
 }
+
 #endif  // defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DML)
 }  // namespace test
 }  // namespace onnxruntime

diff --git a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc
@@ -227,7 +227,7 @@ TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8S8) {
 }
 
 // DML EP supports Float16 output type and Signed A Matrix and Unsigned B Matric for Float32 output
-#if defined(USE_DML) && !defined(USE_CUDA)
+#if defined(USE_DML)
 
 TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8U8) {
   RunMatMulIntegerToFloatTest<int8_t, uint8_t, float, true, false>();

diff --git a/onnxruntime/test/contrib_ops/tensor_op_test.cc b/onnxruntime/test/contrib_ops/tensor_op_test.cc
@@ -121,15 +121,7 @@ void MeanVarianceNormalizationAcrossChannels(bool across_channels, bool normaliz
   test.AddAttribute("normalize_variance", normalize_variance ? one : zero);
   test.AddInput<float>("input", {N, C, H, W}, X);
   test.AddOutput<float>("output", {N, C, H, W}, result);
-#if defined(USE_CUDA) && defined(USE_DML)
-  if (DefaultCudaExecutionProvider() == nullptr) {
-    test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kCudaExecutionProvider, kTensorrtExecutionProvider});
-  } else if (DefaultDmlExecutionProvider() == nullptr) {
-    test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kDmlExecutionProvider, kTensorrtExecutionProvider});
-  }
-#else
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kTensorrtExecutionProvider});  // OpenVINO doesn't support MVN operator below opset 9. TensorRT doesn't support opset 8 of MVN operator.
-#endif
 }
 
 void MeanVarianceNormalizationPerChannel(bool across_channels, bool normalize_variance) {
@@ -196,15 +188,7 @@ void MeanVarianceNormalizationPerChannel(bool across_channels, bool normalize_va
   test.AddAttribute("normalize_variance", normalize_variance ? one : zero);
   test.AddInput<float>("input", {N, C, H, W}, X);
   test.AddOutput<float>("output", {N, C, H, W}, result);
-#if defined(USE_CUDA) && defined(USE_DML)
-  if (DefaultCudaExecutionProvider() == nullptr) {
-    test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kCudaExecutionProvider, kTensorrtExecutionProvider});
-  } else if (DefaultDmlExecutionProvider() == nullptr) {
-    test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kDmlExecutionProvider, kTensorrtExecutionProvider});
-  }
-#else
   test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kTensorrtExecutionProvider});  // OpenVINO doesn't support MVN operator below opset 9. TensorRT doesn't support opset 8 of MVN operator.
-#endif
 }
 
 TEST(MVNContribOpTest, MeanVarianceNormalizationCPUTest_Version1_TO_8) {
@@ -246,9 +230,7 @@ TEST(UnfoldTensorOpTest, LastDim) {
 
   std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
 #ifdef USE_CUDA
-  if (DefaultCudaExecutionProvider() != nullptr) {
-    execution_providers.push_back(DefaultCudaExecutionProvider());
-  }
+  execution_providers.push_back(DefaultCudaExecutionProvider());
 #endif
   execution_providers.push_back(DefaultCpuExecutionProvider());
   tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);