From a7d663429d90900c8d800a4061155c7af19905dc Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 25 Nov 2024 10:50:52 +0800 Subject: [PATCH] Build DML in Windows GPU CI pipeline (#22869) ### Description Add a new stage to build cuda and dml in Windows GPU CI pipeline (PR checks) to prevent regressions introduced by new cuda tests. Update all tests in cuda/testcases name prefix to CudaEp for skipping them easily ### Motivation and Context 1. CudaNhwcEP is added by default when using cuda ep 2. if onnxruntime_ENABLE_CUDA_EP_INTERNAL_TES is enable, the tests in tests/provider/cuda/testcases is added too. ### To do add enable_pybind in the new stage. Now, --enable_pybind will trigger some python test, like onnxruntime_test_python.py. It uses the API of get_avaible_providers() . More discussions are needed to decide how to make it works --- .../providers/ProviderOptionsTest.java | 3 ++ .../test/providers/cuda/cuda_provider_test.cc | 2 +- .../cuda/test_cases/allocator_cuda_test.cc | 4 +-- .../attention_kernel_options_test.cc | 6 ++-- .../cuda/test_cases/beam_search_topk.cc | 2 +- .../test_cases/blkq4_fp16_gemm_sm80_test.cc | 10 +++---- .../cuda_execution_provider_test.cc | 4 +-- .../cuda/test_cases/cuda_utils_test.cc | 2 +- .../cuda/test_cases/gemm_options_test.cc | 12 ++++---- .../cuda/test_cases/greedy_search_top_one.cc | 2 +- .../test_cases/reduction_functions_test.cc | 12 ++++---- .../templates/jobs/win-ci-vs-2022-job.yml | 28 +++++++++++++++---- .../azure-pipelines/templates/win-ci.yml | 2 +- .../win-gpu-cuda-ci-pipeline.yml | 26 ++++++++++++++++- .../win-gpu-dml-ci-pipeline.yml | 4 +-- 15 files changed, 81 insertions(+), 38 deletions(-) diff --git a/java/src/test/java/ai/onnxruntime/providers/ProviderOptionsTest.java b/java/src/test/java/ai/onnxruntime/providers/ProviderOptionsTest.java index 57c4eb3577fd0..fa0b6fd0ef9d9 100644 --- a/java/src/test/java/ai/onnxruntime/providers/ProviderOptionsTest.java +++ b/java/src/test/java/ai/onnxruntime/providers/ProviderOptionsTest.java @@ -27,6 +27,7 @@ import java.util.HashMap; import java.util.Map; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.DisabledIfSystemProperty; import org.junit.jupiter.api.condition.EnabledIfSystemProperty; public class ProviderOptionsTest { @@ -34,6 +35,7 @@ public class ProviderOptionsTest { @Test @EnabledIfSystemProperty(named = "USE_CUDA", matches = "1") + @DisabledIfSystemProperty(named = "NO_CUDA_TEST", matches = "1") public void testCUDAOptions() throws OrtException { // Test standard options OrtCUDAProviderOptions cudaOpts = new OrtCUDAProviderOptions(0); @@ -61,6 +63,7 @@ public void testCUDAOptions() throws OrtException { @Test @EnabledIfSystemProperty(named = "USE_TENSORRT", matches = "1") + @DisabledIfSystemProperty(named = "NO_CUDA_TEST", matches = "1") public void testTensorRT() throws OrtException { // Test standard options OrtTensorRTProviderOptions rtOpts = new OrtTensorRTProviderOptions(0); diff --git a/onnxruntime/test/providers/cuda/cuda_provider_test.cc b/onnxruntime/test/providers/cuda/cuda_provider_test.cc index e57cdd2350fab..e745e1bcb8171 100644 --- a/onnxruntime/test/providers/cuda/cuda_provider_test.cc +++ b/onnxruntime/test/providers/cuda/cuda_provider_test.cc @@ -11,7 +11,7 @@ ProviderInfo_CUDA& GetProviderInfo_CUDA_Test(); namespace test { namespace cuda { -TEST(CUDA_EP_Unittest, All) { +TEST(CudaEpUnittest, All) { onnxruntime::ProviderInfo_CUDA& ep = onnxruntime::GetProviderInfo_CUDA_Test(); ep.TestAll(); } diff --git a/onnxruntime/test/providers/cuda/test_cases/allocator_cuda_test.cc b/onnxruntime/test/providers/cuda/test_cases/allocator_cuda_test.cc index b413d04fe81e8..ec7c6ec4e1605 100644 --- a/onnxruntime/test/providers/cuda/test_cases/allocator_cuda_test.cc +++ b/onnxruntime/test/providers/cuda/test_cases/allocator_cuda_test.cc @@ -11,7 +11,7 @@ namespace onnxruntime { namespace test { -TEST(AllocatorTest, CUDAAllocatorTest) { +TEST(CudaEpAllocatorTest, CUDAAllocatorTest) { OrtDevice::DeviceId cuda_device_id = 0; // ensure CUDA device is available. @@ -77,7 +77,7 @@ TEST(AllocatorTest, CUDAAllocatorTest) { } // test that we fallback to smaller allocations if the growth of the arena exceeds the available memory -TEST(AllocatorTest, CUDAAllocatorFallbackTest) { +TEST(CudaEpAllocatorTest, CUDAAllocatorFallbackTest) { OrtDevice::DeviceId cuda_device_id = 0; size_t free = 0; diff --git a/onnxruntime/test/providers/cuda/test_cases/attention_kernel_options_test.cc b/onnxruntime/test/providers/cuda/test_cases/attention_kernel_options_test.cc index b2e986f680763..ccdc56de5937d 100644 --- a/onnxruntime/test/providers/cuda/test_cases/attention_kernel_options_test.cc +++ b/onnxruntime/test/providers/cuda/test_cases/attention_kernel_options_test.cc @@ -17,7 +17,7 @@ using onnxruntime::contrib::attention::AttentionBackend; namespace onnxruntime { namespace test { -TEST(AttentionKernelOptionsTest, NonZeroValue) { +TEST(CudaEpAttentionKernelOptionsTest, NonZeroValue) { { AttentionKernelOptions options; int value = static_cast(AttentionBackend::FLASH_ATTENTION) | static_cast(AttentionBackend::EFFICIENT_ATTENTION); @@ -156,7 +156,7 @@ TEST(AttentionKernelOptionsTest, NonZeroValue) { } // Test all environment variables take effect when option value is 0. -TEST(AttentionKernelOptionsTest, DefaultOptionWithEnvVar) { +TEST(CudaEpAttentionKernelOptionsTest, DefaultOptionWithEnvVar) { constexpr int value = 0; ScopedEnvironmentVariables scoped_env_vars{ EnvVarMap{ @@ -186,7 +186,7 @@ TEST(AttentionKernelOptionsTest, DefaultOptionWithEnvVar) { } // Test default min sequence lengths when environment variables are not set. -TEST(AttentionKernelOptionsTest, DefaultMinSeqLens) { +TEST(CudaEpAttentionKernelOptionsTest, DefaultMinSeqLens) { constexpr int value = 0; ScopedEnvironmentVariables scoped_env_vars{ EnvVarMap{ diff --git a/onnxruntime/test/providers/cuda/test_cases/beam_search_topk.cc b/onnxruntime/test/providers/cuda/test_cases/beam_search_topk.cc index a0d115c41c14b..97d50398a5550 100644 --- a/onnxruntime/test/providers/cuda/test_cases/beam_search_topk.cc +++ b/onnxruntime/test/providers/cuda/test_cases/beam_search_topk.cc @@ -68,7 +68,7 @@ void ComputeTopKReference(const std::vector& values, } } -TEST(TestBeamSearch, TopK) { +TEST(CudaEpTestBeamSearch, TopK) { int32_t batch_size = 4; int32_t beam_size = 4; int32_t vocab_size = 50257; diff --git a/onnxruntime/test/providers/cuda/test_cases/blkq4_fp16_gemm_sm80_test.cc b/onnxruntime/test/providers/cuda/test_cases/blkq4_fp16_gemm_sm80_test.cc index 3fcb9045ee7e6..d8fb3c8256012 100644 --- a/onnxruntime/test/providers/cuda/test_cases/blkq4_fp16_gemm_sm80_test.cc +++ b/onnxruntime/test/providers/cuda/test_cases/blkq4_fp16_gemm_sm80_test.cc @@ -230,7 +230,7 @@ void testPrepack(int rows, int columns) { } // TODO: code runs on CPU, but this is for sm80 only, maybe enable only when test on sm80 -TEST(BlkQ4_GEMM, PrepackSm80Test) { +TEST(CudaEpBlkQ4_GEMM, PrepackSm80Test) { Status status = onnxruntime::cuda::test::sm80_supported(); if (!status.IsOK()) { // skip the test if sm80 is not supported @@ -263,7 +263,7 @@ TEST(BlkQ4_GEMM, PrepackSm80Test) { testPrepack(256, 256); } -TEST(BlkQ4_GEMM, Sm80RowBlockingTest) { +TEST(CudaEpBlkQ4_GEMM, Sm80RowBlockingTest) { Status status = onnxruntime::cuda::test::sm80_supported(); if (!status.IsOK()) { // skip the test if sm80 is not supported @@ -292,7 +292,7 @@ TEST(BlkQ4_GEMM, Sm80RowBlockingTest) { onnxruntime::cuda::test::run_blkq4_gemm<64, false, false, true>(256, 1024, 576); } -TEST(BlkQ4_GEMM, Sm80ColBlockingTest) { +TEST(CudaEpBlkQ4_GEMM, Sm80ColBlockingTest) { Status status = onnxruntime::cuda::test::sm80_supported(); if (!status.IsOK()) { // skip the test if sm80 is not supported @@ -305,7 +305,7 @@ TEST(BlkQ4_GEMM, Sm80ColBlockingTest) { onnxruntime::cuda::test::run_blkq4_gemm<64, true, false, true>(256, 1024, 576); } -TEST(BlkQ4_GEMM, Sm80SmallMTest) { +TEST(CudaEpBlkQ4_GEMM, Sm80SmallMTest) { Status status = onnxruntime::cuda::test::sm80_supported(); if (!status.IsOK()) { // skip the test if sm80 is not supported @@ -326,7 +326,7 @@ TEST(BlkQ4_GEMM, Sm80SmallMTest) { onnxruntime::cuda::test::run_blkq4_gemm<64, true, true, true>(16, 1024, 576); } -TEST(BlkQ4_GEMM, Sm80SmallTileKernelTest) { +TEST(CudaEpBlkQ4_GEMM, Sm80SmallTileKernelTest) { Status status = onnxruntime::cuda::test::sm80_supported(); if (!status.IsOK()) { // skip the test if sm80 is not supported diff --git a/onnxruntime/test/providers/cuda/test_cases/cuda_execution_provider_test.cc b/onnxruntime/test/providers/cuda/test_cases/cuda_execution_provider_test.cc index 72357ec7e02d2..f3222c6f683b5 100644 --- a/onnxruntime/test/providers/cuda/test_cases/cuda_execution_provider_test.cc +++ b/onnxruntime/test/providers/cuda/test_cases/cuda_execution_provider_test.cc @@ -19,7 +19,7 @@ namespace cuda { namespace test { // TODO: Since the "DeferredRelease" has been migrated to CudaStream class, // we should migrate this test from CudaEP unit test to CudaStream unit test. -TEST(TestDeferredRelease, WithArena) { +TEST(CudaEpTestDeferredRelease, WithArena) { // Create CUDA EP. CUDAExecutionProviderInfo info; CUDAExecutionProvider ep(info); @@ -52,7 +52,7 @@ TEST(TestDeferredRelease, WithArena) { ORT_THROW_IF_ERROR(ep.OnRunEnd(true, run_opts)); } -TEST(TestDeferredRelease, WithoutArena) { +TEST(CudaEpTestDeferredRelease, WithoutArena) { // Create CUDA EP. CUDAExecutionProviderInfo info; CUDAExecutionProvider ep(info); diff --git a/onnxruntime/test/providers/cuda/test_cases/cuda_utils_test.cc b/onnxruntime/test/providers/cuda/test_cases/cuda_utils_test.cc index 7468a5718425e..3538c7add94d0 100644 --- a/onnxruntime/test/providers/cuda/test_cases/cuda_utils_test.cc +++ b/onnxruntime/test/providers/cuda/test_cases/cuda_utils_test.cc @@ -40,7 +40,7 @@ void TestFillCorrectness(size_t num_elements, TElement value) { } } // namespace -TEST(CudaUtilsTest, FillCorrectness) { +TEST(CudaEpUnittest, FillCorrectness) { TestFillCorrectness(1 << 20, 1); TestFillCorrectness(1 << 20, 2); TestFillCorrectness(1 << 20, 3); diff --git a/onnxruntime/test/providers/cuda/test_cases/gemm_options_test.cc b/onnxruntime/test/providers/cuda/test_cases/gemm_options_test.cc index 6636e15040393..518fde5804b23 100644 --- a/onnxruntime/test/providers/cuda/test_cases/gemm_options_test.cc +++ b/onnxruntime/test/providers/cuda/test_cases/gemm_options_test.cc @@ -10,7 +10,7 @@ namespace onnxruntime { namespace cuda { namespace test { -TEST(CudaGemmOptions, TestDefaultOptions) { +TEST(CudaEpGemmOptions, TestDefaultOptions) { HalfGemmOptions gemm_options; ASSERT_FALSE(gemm_options.IsCompute16F()); #if defined(USE_CUDA) @@ -22,7 +22,7 @@ TEST(CudaGemmOptions, TestDefaultOptions) { #endif } -TEST(CudaGemmOptions, TestCompute16F) { +TEST(CudaEpGemmOptions, TestCompute16F) { HalfGemmOptions gemm_options; gemm_options.Initialize(1); ASSERT_TRUE(gemm_options.IsCompute16F()); @@ -35,7 +35,7 @@ TEST(CudaGemmOptions, TestCompute16F) { #endif } -TEST(CudaGemmOptions, NoReducedPrecision) { +TEST(CudaEpGemmOptions, NoReducedPrecision) { HalfGemmOptions gemm_options; gemm_options.Initialize(2); ASSERT_FALSE(gemm_options.IsCompute16F()); @@ -48,7 +48,7 @@ TEST(CudaGemmOptions, NoReducedPrecision) { #endif } -TEST(CudaGemmOptions, Pedantic) { +TEST(CudaEpGemmOptions, Pedantic) { HalfGemmOptions gemm_options; gemm_options.Initialize(4); ASSERT_FALSE(gemm_options.IsCompute16F()); @@ -61,7 +61,7 @@ TEST(CudaGemmOptions, Pedantic) { #endif } -TEST(CudaGemmOptions, Compute16F_Pedantic) { +TEST(CudaEpGemmOptions, Compute16F_Pedantic) { HalfGemmOptions gemm_options; gemm_options.Initialize(5); ASSERT_TRUE(gemm_options.IsCompute16F()); @@ -74,7 +74,7 @@ TEST(CudaGemmOptions, Compute16F_Pedantic) { #endif } -TEST(CudaGemmOptions, Compute16F_NoReducedPrecision) { +TEST(CudaEpGemmOptions, Compute16F_NoReducedPrecision) { HalfGemmOptions gemm_options; gemm_options.Initialize(3); ASSERT_TRUE(gemm_options.IsCompute16F()); diff --git a/onnxruntime/test/providers/cuda/test_cases/greedy_search_top_one.cc b/onnxruntime/test/providers/cuda/test_cases/greedy_search_top_one.cc index 6b8cd68de0fca..ba24cf858e80f 100644 --- a/onnxruntime/test/providers/cuda/test_cases/greedy_search_top_one.cc +++ b/onnxruntime/test/providers/cuda/test_cases/greedy_search_top_one.cc @@ -41,7 +41,7 @@ void ComputeTop1Reference(const std::vector& values, } } -TEST(TestGreedySearch, TopOne) { +TEST(CudaEpTestGreedySearch, TopOne) { int32_t batch_size = 4; int32_t vocab_size = 50257; int32_t batch_x_vocab = batch_size * vocab_size; diff --git a/onnxruntime/test/providers/cuda/test_cases/reduction_functions_test.cc b/onnxruntime/test/providers/cuda/test_cases/reduction_functions_test.cc index ec7e98528504e..09c9c1e5f8f6a 100644 --- a/onnxruntime/test/providers/cuda/test_cases/reduction_functions_test.cc +++ b/onnxruntime/test/providers/cuda/test_cases/reduction_functions_test.cc @@ -179,7 +179,7 @@ void TestReduceColumnsToColumn(int m, int n, float relative_error_tolerance = 1e } } // namespace -TEST(ReductionFunctionsTest, ReduceRowToScalar) { +TEST(CudaEpReductionFunctionsTest, ReduceRowToScalar) { TestReduceRowToScalarApis(3); TestReduceRowToScalarApis(19); TestReduceRowToScalarApis(123); @@ -188,7 +188,7 @@ TEST(ReductionFunctionsTest, ReduceRowToScalar) { TestReduceRowToScalarApis(941736, 2e-4f); } -TEST(ReductionFunctionsTest, ReduceRowsToRow) { +TEST(CudaEpReductionFunctionsTest, ReduceRowsToRow) { for (int m : {3, 193, 2945}) { for (int n : {3, 193, 2945}) { TestReduceRowsToRow(m, n, true); @@ -197,7 +197,7 @@ TEST(ReductionFunctionsTest, ReduceRowsToRow) { } } -TEST(ReductionFunctionsTest, ReduceColumnsToColumn) { +TEST(CudaEpReductionFunctionsTest, ReduceColumnsToColumn) { for (int m : {3, 193, 2945}) { for (int n : {3, 193, 2945}) { TestReduceColumnsToColumn(m, n); @@ -205,7 +205,7 @@ TEST(ReductionFunctionsTest, ReduceColumnsToColumn) { } } -TEST(ReductionFunctionsTest, BufferOffsets) { +TEST(CudaEpReductionFunctionsTest, BufferOffsets) { const int m = 2048; const int n = 1024; const TensorShape shape{m, n}; @@ -240,7 +240,7 @@ TEST(ReductionFunctionsTest, BufferOffsets) { } } -TEST(ReductionFunctionsTest, InvalidBufferSize) { +TEST(CudaEpReductionFunctionsTest, InvalidBufferSize) { const int m = 2048; const int n = 1024; const TensorShape shape{m, n}; @@ -262,7 +262,7 @@ TEST(ReductionFunctionsTest, InvalidBufferSize) { ASSERT_FALSE(status.IsOK()); } -TEST(ReductionFunctionsTest, GetApplicableMatrixReduction) { +TEST(CudaEpReductionFunctionsTest, GetApplicableMatrixReduction) { auto test_get_applicable_matrix_reduction = [](cudnnReduceTensorOp_t cudnn_op, const std::vector& dims, const std::vector& axes, diff --git a/tools/ci_build/github/azure-pipelines/templates/jobs/win-ci-vs-2022-job.yml b/tools/ci_build/github/azure-pipelines/templates/jobs/win-ci-vs-2022-job.yml index e8f391a73fa7b..7bdd069de711b 100644 --- a/tools/ci_build/github/azure-pipelines/templates/jobs/win-ci-vs-2022-job.yml +++ b/tools/ci_build/github/azure-pipelines/templates/jobs/win-ci-vs-2022-job.yml @@ -218,16 +218,32 @@ jobs: - powershell: | python3 -m pip uninstall -y onnxruntime onnxruntime-gpu onnxruntime-training onnxruntime-directml -qq Get-ChildItem -Path dist/*.whl | foreach {pip --disable-pip-version-check install --upgrade $_.fullname} - workingDirectory: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}\${{ parameters.BuildConfig }}' displayName: 'Install onnxruntime wheel' - ${{ if eq(parameters.RunOnnxRuntimeTests, true) }}: - - powershell: | - python $(Build.SourcesDirectory)\tools\ci_build\build.py --config ${{ parameters.BuildConfig }} --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --cmake_generator "Visual Studio 17 2022" --build_shared_lib --enable_onnx_tests ${{ parameters.additionalBuildFlags }} - - workingDirectory: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}\${{ parameters.BuildConfig }}' - displayName: 'Run tests' + - ${{ if and(contains(parameters.additionalBuildFlags, 'use_cuda'), contains(parameters.additionalBuildFlags, 'use_dml')) }}: + - powershell: | + python $(Build.SourcesDirectory)\tools\ci_build\build.py --config ${{ parameters.BuildConfig }} --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --cmake_generator "Visual Studio 17 2022" --enable_onnx_tests ${{ parameters.additionalBuildFlags }} + workingDirectory: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}\${{ parameters.BuildConfig }}' + displayName: 'Run tests excluding CUDA tests' + env: + NO_CUDA_TEST: '1' + GTEST_FILTER: '-CudaEp*:CudaNhwcTypedTest*:*cpu_*models*' # Exclude CUDA EP tests under providers/cuda/ and cpu models test + PATH: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}\${{ parameters.BuildConfig }};$(PATH)' # For onnxruntime4j_test to find dependent dlls + - powershell: | + python $(Build.SourcesDirectory)\tools\ci_build\build.py --config ${{ parameters.BuildConfig }} --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --cmake_generator "Visual Studio 17 2022" --enable_onnx_tests ${{ parameters.additionalBuildFlags }} + workingDirectory: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}\${{ parameters.BuildConfig }}' + displayName: 'Run tests excluding DML tests' + env: + NO_DML_TEST: '1' + GTEST_FILTER: '-*cpu_*models*' + PATH: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}\${{ parameters.BuildConfig }};$(PATH)' + - ${{ else }}: + - powershell: | + python $(Build.SourcesDirectory)\tools\ci_build\build.py --config ${{ parameters.BuildConfig }} --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --cmake_generator "Visual Studio 17 2022" --enable_onnx_tests ${{ parameters.additionalBuildFlags }} + workingDirectory: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}\${{ parameters.BuildConfig }}' + displayName: 'Run tests' - ${{ if eq(parameters.GenerateDocumentation, true) }}: - task: PythonScript@0 diff --git a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml index 2816352b1f189..cc46e0c92c902 100644 --- a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml @@ -413,7 +413,7 @@ stages: workingDirectory: '$(Build.BinariesDirectory)' env: NO_CUDA_TEST: '1' - GTEST_FILTER: -*CudaNhwcTypedTest* + GTEST_FILTER: '-CudaEp*:CudaNhwcTypedTest*' # Exclude CUDA EP tests under providers/cuda/ - task: PythonScript@0 displayName: 'test excludes DML' condition: and(succeeded(), eq('${{ parameters.runTests}}', true)) diff --git a/tools/ci_build/github/azure-pipelines/win-gpu-cuda-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-gpu-cuda-ci-pipeline.yml index 47ece37e66e09..67fd47c3150af 100644 --- a/tools/ci_build/github/azure-pipelines/win-gpu-cuda-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/win-gpu-cuda-ci-pipeline.yml @@ -62,4 +62,28 @@ stages: RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }} ORT_EP_NAME: CUDA WITH_CACHE: true - MachinePool: onnxruntime-Win2022-GPU-A10 \ No newline at end of file + MachinePool: onnxruntime-Win2022-GPU-A10 + +- stage: cuda_dml + dependsOn: [] + jobs: + - template: templates/jobs/win-ci-vs-2022-job.yml + parameters: + BuildConfig: 'RelWithDebInfo' + EnvSetupScript: setup_env_cuda.bat + buildArch: x64 + additionalBuildFlags: >- + --build_java --build_nodejs --use_cuda --cuda_home="$(Agent.TempDirectory)\v${{ parameters.CudaVersion }}" + --enable_cuda_profiling --enable_transformers_tool_test + --use_dml + --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86 + --cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=ON + --cmake_extra_defines onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON + msbuildPlatform: x64 + isX86: false + job_name_suffix: x64_RelWithDebInfo + RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }} + ORT_EP_NAME: CUDA + EnablePython: false + WITH_CACHE: true + MachinePool: onnxruntime-Win2022-GPU-A10 diff --git a/tools/ci_build/github/azure-pipelines/win-gpu-dml-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-gpu-dml-ci-pipeline.yml index 94b0aa680d54d..911d99cd2adf3 100644 --- a/tools/ci_build/github/azure-pipelines/win-gpu-dml-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/win-gpu-dml-ci-pipeline.yml @@ -43,11 +43,11 @@ stages: BuildConfig: 'RelWithDebInfo' EnvSetupScript: setup_env.bat buildArch: x64 - additionalBuildFlags: --enable_pybind --use_dml --enable_wcos --use_winml + additionalBuildFlags: --enable_pybind --use_dml --enable_wcos --use_winml msbuildPlatform: x64 isX86: false job_name_suffix: x64_RelWithDebInfo RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }} ORT_EP_NAME: DML WITH_CACHE: false - MachinePool: onnxruntime-Win2022-GPU-dml-A10 \ No newline at end of file + MachinePool: onnxruntime-Win2022-GPU-dml-A10