Skip to content

Commit

Permalink
Revert DML pipeline changes (#23135)
Browse files Browse the repository at this point in the history
### Description
Previously we wanted to add DirectML EP to existing onnxruntime Windows
CUDA packages. After careful consideration, we will postpone the change.
This PR reverts some pipeline changes previously made by @mszhanyi and
@jchen351 .
  • Loading branch information
snnn authored Dec 18, 2024
1 parent e76bd2f commit 5d7030e
Show file tree
Hide file tree
Showing 43 changed files with 94 additions and 456 deletions.
1 change: 0 additions & 1 deletion java/src/test/java/ai/onnxruntime/InferenceTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -737,7 +737,6 @@ public void testCoreML() throws OrtException {
runProvider(OrtProvider.CORE_ML);
}

@Disabled("DirectML Java API hasn't been supported yet")
@Test
@EnabledIfSystemProperty(named = "USE_DML", matches = "1")
public void testDirectML() throws OrtException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,13 @@
import java.util.HashMap;
import java.util.Map;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.DisabledIfSystemProperty;
import org.junit.jupiter.api.condition.EnabledIfSystemProperty;

public class ProviderOptionsTest {
private static final OrtEnvironment env = TestHelpers.getOrtEnvironment();

@Test
@EnabledIfSystemProperty(named = "USE_CUDA", matches = "1")
@DisabledIfSystemProperty(named = "NO_CUDA_TEST", matches = "1")
public void testCUDAOptions() throws OrtException {
// Test standard options
OrtCUDAProviderOptions cudaOpts = new OrtCUDAProviderOptions(0);
Expand Down Expand Up @@ -63,7 +61,6 @@ public void testCUDAOptions() throws OrtException {

@Test
@EnabledIfSystemProperty(named = "USE_TENSORRT", matches = "1")
@DisabledIfSystemProperty(named = "NO_CUDA_TEST", matches = "1")
public void testTensorRT() throws OrtException {
// Test standard options
OrtTensorRTProviderOptions rtOpts = new OrtTensorRTProviderOptions(0);
Expand Down
9 changes: 0 additions & 9 deletions onnxruntime/test/common/cuda_op_test_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,6 @@

#include "test/util/include/default_providers.h"

#define SKIP_CUDA_TEST_WITH_DML \
if (DefaultCudaExecutionProvider() == nullptr) { \
GTEST_SKIP() << "CUDA Tests are not supported while DML is enabled"; \
}

namespace onnxruntime {
namespace test {

Expand All @@ -18,10 +13,6 @@ namespace test {
int GetCudaArchitecture();

inline bool HasCudaEnvironment(int min_cuda_architecture) {
if (DefaultCudaExecutionProvider() == nullptr) {
return false;
}

if (DefaultCudaExecutionProvider().get() == nullptr) {
return false;
}
Expand Down
6 changes: 0 additions & 6 deletions onnxruntime/test/contrib_ops/beam_search_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,6 @@ TEST(BeamSearchTest, GptBeamSearchFp32) {
const char* const output_names[] = {"sequences"};

Ort::SessionOptions session_options;
#if defined(USE_CUDA) && defined(USE_DML)
SKIP_CUDA_TEST_WITH_DML;
#endif
#ifdef USE_CUDA
OrtCUDAProviderOptionsV2 cuda_options;
cuda_options.use_tf32 = false;
Expand Down Expand Up @@ -171,9 +168,6 @@ TEST(BeamSearchTest, GptBeamSearchFp16) {
bool enable_rocm = (nullptr != DefaultRocmExecutionProvider().get());
if (enable_cuda || enable_rocm) {
Ort::SessionOptions session_options;
#if defined(USE_CUDA) && defined(USE_DML)
SKIP_CUDA_TEST_WITH_DML;
#endif
#ifdef USE_CUDA
OrtCUDAProviderOptionsV2 cuda_options;
cuda_options.use_tf32 = false;
Expand Down
3 changes: 0 additions & 3 deletions onnxruntime/test/contrib_ops/bias_dropout_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -181,9 +181,6 @@ void RunBiasDropoutTest(const bool use_mask, const std::vector<int64_t>& input_s
t.SetCustomOutputVerifier(output_verifier);
std::vector<std::unique_ptr<IExecutionProvider>> t_eps;
#ifdef USE_CUDA
if (DefaultCudaExecutionProvider() == nullptr) {
return;
}
t_eps.emplace_back(DefaultCudaExecutionProvider());
#elif USE_ROCM
t_eps.emplace_back(DefaultRocmExecutionProvider());
Expand Down
7 changes: 1 addition & 6 deletions onnxruntime/test/contrib_ops/bitmask_dropout_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,7 @@ void RunTestForInference(const std::vector<int64_t>& input_dims, bool has_ratio

std::vector<std::unique_ptr<IExecutionProvider>> test_eps;
#ifdef USE_CUDA
if (DefaultCudaExecutionProvider() != nullptr) {
test_eps.emplace_back(DefaultCudaExecutionProvider());
}
test_eps.emplace_back(DefaultCudaExecutionProvider());
#elif USE_ROCM
test_eps.emplace_back(DefaultRocmExecutionProvider());
#endif
Expand Down Expand Up @@ -124,9 +122,6 @@ void RunTestForTraining(const std::vector<int64_t>& input_dims) {

std::vector<std::unique_ptr<IExecutionProvider>> dropout_eps;
#ifdef USE_CUDA
if (DefaultCudaExecutionProvider() == nullptr) {
return;
}
dropout_eps.emplace_back(DefaultCudaExecutionProvider());
#elif USE_ROCM
dropout_eps.emplace_back(DefaultRocmExecutionProvider());
Expand Down
13 changes: 3 additions & 10 deletions onnxruntime/test/contrib_ops/layer_norm_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
// Licensed under the MIT License.

#include "test/providers/compare_provider_test_utils.h"
#include "test/util/include/default_providers.h"

namespace onnxruntime {
namespace test {
Expand Down Expand Up @@ -80,20 +79,14 @@ static void TestLayerNorm(const std::vector<int64_t>& x_dims,
#endif

#ifdef USE_CUDA
if (DefaultCudaExecutionProvider() != nullptr) {
test.CompareWithCPU(kCudaExecutionProvider);
}
test.CompareWithCPU(kCudaExecutionProvider);
#elif USE_ROCM
test.CompareWithCPU(kRocmExecutionProvider);
#elif USE_DML
test.CompareWithCPU(kDmlExecutionProvider);
#elif USE_WEBGPU
test.CompareWithCPU(kWebGpuExecutionProvider);
#endif

#ifdef USE_DML
if (DefaultDmlExecutionProvider() != nullptr) {
test.CompareWithCPU(kDmlExecutionProvider);
}
#endif
}

TEST(CudaKernelTest, LayerNorm_NullInput) {
Expand Down
28 changes: 8 additions & 20 deletions onnxruntime/test/contrib_ops/matmul_4bits_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -490,17 +490,13 @@ void RunTest(int64_t M, int64_t N, int64_t K, int64_t block_size, int64_t accura
std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
if (use_float16) {
#ifdef USE_CUDA
if (DefaultCudaExecutionProvider() != nullptr) {
execution_providers.push_back(DefaultCudaExecutionProvider());
}
execution_providers.push_back(DefaultCudaExecutionProvider());
#endif
#ifdef USE_ROCM
execution_providers.push_back(DefaultRocmExecutionProvider());
#endif
#ifdef USE_DML
if (DefaultDmlExecutionProvider() != nullptr) {
execution_providers.push_back(DefaultDmlExecutionProvider());
}
execution_providers.push_back(DefaultDmlExecutionProvider());
#endif
#ifdef USE_WEBGPU
execution_providers.push_back(DefaultWebGpuExecutionProvider());
Expand All @@ -518,11 +514,8 @@ void RunTest(int64_t M, int64_t N, int64_t K, int64_t block_size, int64_t accura
} // namespace

TEST(MatMulNBits, Float16Cuda) {
#if defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DML)
std::vector<bool> has_gidx_options = {true, false};
if (DefaultDmlExecutionProvider() != nullptr) {
has_gidx_options.assign(1, false);
}
#if defined(USE_CUDA) || defined(USE_ROCM)
auto has_gidx_options = {true, false};
#else
auto has_gidx_options = {false};
#endif
Expand All @@ -533,9 +526,7 @@ TEST(MatMulNBits, Float16Cuda) {
for (auto block_size : {16, 32, 64, 128}) {
for (auto has_gidx : has_gidx_options) {
#ifdef USE_DML
if (DefaultDmlExecutionProvider() != nullptr) {
RunTest(M, N, K, block_size, 0, false, true, has_gidx, true, 0.04f);
}
RunTest(M, N, K, block_size, 0, false, true, has_gidx, true, 0.04f);
#else
RunTest(M, N, K, block_size, 0, false, true, has_gidx);
RunTest(M, N, K, block_size, 0, true, true, has_gidx, false);
Expand All @@ -548,16 +539,12 @@ TEST(MatMulNBits, Float16Cuda) {
}

TEST(MatMulNBits, Float16Large) {
#if defined(USE_CUDA) || defined(USE_DML)
#ifdef USE_DML
// For some reason, the A10 machine that runs these tests during CI has a much bigger error than all retail
// machines we tested on. All consumer-grade machines from Nvidia/AMD/Intel seem to pass these tests with an
// absolute error of 0.08, but the A10 has errors going as high as 0.22. Ultimately, given the large number
// of elements in this test, ULPs should probably be used instead of absolute/relative tolerances.
float abs_error = 0.05f;
if (DefaultDmlExecutionProvider() != nullptr) {
// it means the ep is dml in runtime, the abs_error is changed to 0.3f
abs_error = 0.3f;
}
float abs_error = 0.3f;
#elif USE_WEBGPU
// See Intel A770 to pass these tests with an absolute error of 0.08.
float abs_error = 0.08f;
Expand All @@ -573,6 +560,7 @@ TEST(MatMulNBits, Float16Large) {
}
}
}

#endif // defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DML)
} // namespace test
} // namespace onnxruntime
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8S8) {
}

// DML EP supports Float16 output type and Signed A Matrix and Unsigned B Matric for Float32 output
#if defined(USE_DML) && !defined(USE_CUDA)
#if defined(USE_DML)

TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8U8) {
RunMatMulIntegerToFloatTest<int8_t, uint8_t, float, true, false>();
Expand Down
20 changes: 1 addition & 19 deletions onnxruntime/test/contrib_ops/tensor_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -121,15 +121,7 @@ void MeanVarianceNormalizationAcrossChannels(bool across_channels, bool normaliz
test.AddAttribute("normalize_variance", normalize_variance ? one : zero);
test.AddInput<float>("input", {N, C, H, W}, X);
test.AddOutput<float>("output", {N, C, H, W}, result);
#if defined(USE_CUDA) && defined(USE_DML)
if (DefaultCudaExecutionProvider() == nullptr) {
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kCudaExecutionProvider, kTensorrtExecutionProvider});
} else if (DefaultDmlExecutionProvider() == nullptr) {
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kDmlExecutionProvider, kTensorrtExecutionProvider});
}
#else
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kTensorrtExecutionProvider}); // OpenVINO doesn't support MVN operator below opset 9. TensorRT doesn't support opset 8 of MVN operator.
#endif
}

void MeanVarianceNormalizationPerChannel(bool across_channels, bool normalize_variance) {
Expand Down Expand Up @@ -196,15 +188,7 @@ void MeanVarianceNormalizationPerChannel(bool across_channels, bool normalize_va
test.AddAttribute("normalize_variance", normalize_variance ? one : zero);
test.AddInput<float>("input", {N, C, H, W}, X);
test.AddOutput<float>("output", {N, C, H, W}, result);
#if defined(USE_CUDA) && defined(USE_DML)
if (DefaultCudaExecutionProvider() == nullptr) {
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kCudaExecutionProvider, kTensorrtExecutionProvider});
} else if (DefaultDmlExecutionProvider() == nullptr) {
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kDmlExecutionProvider, kTensorrtExecutionProvider});
}
#else
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kTensorrtExecutionProvider}); // OpenVINO doesn't support MVN operator below opset 9. TensorRT doesn't support opset 8 of MVN operator.
#endif
}

TEST(MVNContribOpTest, MeanVarianceNormalizationCPUTest_Version1_TO_8) {
Expand Down Expand Up @@ -246,9 +230,7 @@ TEST(UnfoldTensorOpTest, LastDim) {

std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
#ifdef USE_CUDA
if (DefaultCudaExecutionProvider() != nullptr) {
execution_providers.push_back(DefaultCudaExecutionProvider());
}
execution_providers.push_back(DefaultCudaExecutionProvider());
#endif
execution_providers.push_back(DefaultCpuExecutionProvider());
tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
Expand Down
Loading

0 comments on commit 5d7030e

Please sign in to comment.