From ade79b84e638a37c7fb48fb9f9c19aa7fc7b5763 Mon Sep 17 00:00:00 2001 From: Jian Chen Date: Wed, 4 Dec 2024 21:20:12 -0500 Subject: [PATCH] Adding DML to python cuda package (#22606) --- .../test/python/onnx_backend_test_series.py | 41 ++++++++++++------- .../onnx_backend_test_series_filters.jsonc | 7 ++++ .../jobs/steps/py_packaging_test_step.yml | 21 ++++++++++ .../stages/py-gpu-packaging-stage.yml | 2 +- .../stages/py-win-gpu-stage.yml | 27 ++++++------ 5 files changed, 70 insertions(+), 28 deletions(-) create mode 100644 tools/ci_build/github/azure-pipelines/stages/jobs/steps/py_packaging_test_step.yml diff --git a/onnxruntime/test/python/onnx_backend_test_series.py b/onnxruntime/test/python/onnx_backend_test_series.py index 8fc76da3495a8..a274b90dc042f 100644 --- a/onnxruntime/test/python/onnx_backend_test_series.py +++ b/onnxruntime/test/python/onnx_backend_test_series.py @@ -105,7 +105,7 @@ def load_jsonc(basename: str): return json.loads("\n".join(lines)) -def create_backend_test(test_name=None): +def create_backend_test(devices: list[str], test_name=None): """Creates an OrtBackendTest and adds its TestCase's to global scope so unittest will find them.""" overrides = load_jsonc("onnx_backend_test_series_overrides.jsonc") @@ -126,30 +126,29 @@ def create_backend_test(test_name=None): else: filters = load_jsonc("onnx_backend_test_series_filters.jsonc") current_failing_tests = apply_filters(filters, "current_failing_tests") - if platform.architecture()[0] == "32bit": current_failing_tests += apply_filters(filters, "current_failing_tests_x86") - if backend.supports_device("DNNL"): + if backend.supports_device("DNNL") or "DNNL" in devices: current_failing_tests += apply_filters(filters, "current_failing_tests_DNNL") - if backend.supports_device("NNAPI"): + if backend.supports_device("NNAPI") or "NNAPI" in devices: current_failing_tests += apply_filters(filters, "current_failing_tests_NNAPI") - if backend.supports_device("OPENVINO_GPU"): + if backend.supports_device("OPENVINO_GPU") or "OPENVINO_GPU" in devices: current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_GPU") - if backend.supports_device("OPENVINO_CPU"): + if backend.supports_device("OPENVINO_CPU") or "OPENVINO_CPU" in devices: current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_CPU_FP32") current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_CPU_FP16") - if backend.supports_device("OPENVINO_NPU"): + if backend.supports_device("OPENVINO_NPU") or "OPENVINO_NPU" in devices: current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_NPU") - if backend.supports_device("OPENVINO"): + if backend.supports_device("OPENVINO") or "OPENVINO" in devices: current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_opset18") - if backend.supports_device("MIGRAPHX"): + if backend.supports_device("MIGRAPHX") or "MIGRAPHX" in devices: current_failing_tests += apply_filters(filters, "current_failing_tests_MIGRAPHX") if backend.supports_device("WEBGPU"): @@ -158,8 +157,16 @@ def create_backend_test(test_name=None): # Skip these tests for a "pure" DML onnxruntime python wheel. We keep these tests enabled for instances where both DML and CUDA # EPs are available (Windows GPU CI pipeline has this config) - these test will pass because CUDA has higher precedence than DML # and the nodes are assigned to only the CUDA EP (which supports these tests) - if backend.supports_device("DML") and not backend.supports_device("GPU"): + if (backend.supports_device("DML") and not backend.supports_device("GPU")) or "DML" in devices: current_failing_tests += apply_filters(filters, "current_failing_tests_pure_DML") + # exclude CUDA EP when DML test is running. + os.environ["ORT_ONNX_BACKEND_EXCLUDE_PROVIDERS"] = "TensorrtExecutionProvider,CUDAExecutionProvider" + elif backend.supports_device("DML") and "DML" not in devices: + # exclude DML EP when CUDA test is running. + os.environ["ORT_ONNX_BACKEND_EXCLUDE_PROVIDERS"] = "TensorrtExecutionProvider,DmlExecutionProvider" + else: + # exclude TRT EP temporarily and only test CUDA EP to retain previous behavior + os.environ["ORT_ONNX_BACKEND_EXCLUDE_PROVIDERS"] = "TensorrtExecutionProvider" filters = ( current_failing_tests @@ -172,9 +179,6 @@ def create_backend_test(test_name=None): backend_test.exclude("(" + "|".join(filters) + ")") print("excluded tests:", filters) - # exclude TRT EP temporarily and only test CUDA EP to retain previous behavior - os.environ["ORT_ONNX_BACKEND_EXCLUDE_PROVIDERS"] = "TensorrtExecutionProvider" - # import all test cases at global scope to make # them visible to python.unittest. globals().update(backend_test.enable_report().test_cases) @@ -199,6 +203,15 @@ def parse_args(): help="Only run tests that match this value. Matching is regex based, and '.*' is automatically appended", ) + parser.add_argument( + "--devices", + type=str, + choices=["CPU", "CUDA", "MIGRAPHX", "DNNL", "DML", "OPENVINO_GPU", "OPENVINO_CPU", "OPENVINO_NPU", "OPENVINO"], + nargs="+", # allows multiple values + default=["CPU"], # default to ["CPU"] if no input is given + help="Select one or more devices CPU, CUDA, MIGRAPHX, DNNL, DML, OPENVINO_GPU, OPENVINO_CPU, OPENVINO_NPU, OPENVINO", + ) + # parse just our args. python unittest has its own args and arg parsing, and that runs inside unittest.main() parsed, unknown = parser.parse_known_args() sys.argv = sys.argv[:1] + unknown @@ -209,5 +222,5 @@ def parse_args(): if __name__ == "__main__": args = parse_args() - create_backend_test(args.test_name) + create_backend_test(args.devices, args.test_name) unittest.main() diff --git a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc index f083ab14ad133..7ecaab6fedb02 100644 --- a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc +++ b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc @@ -750,6 +750,13 @@ "^test_reduce_log_sum_empty_set_cpu", "^test_reduce_log_sum_exp_empty_set_cpu", "^test_reduce_prod_empty_set_cpu", + // Bug: DML EP some how executes these CUDA tests and failed + // TODO: Remove these tests when DML EP is fixed + "^test_convtranspose_autopad_same_cuda", + "^test_asin_example_cuda", + "^test_dynamicquantizelinear_cuda", + "^test_dynamicquantizelinear_expanded_cuda", + "^test_reduce_min_empty_set_cuda", //Bug: DML EP does not execute operators with an empty input tensor //TODO: Resolve as a graph implementation that returns a constant inf tensor with appropriate strides "^test_reduce_min_empty_set_cpu" diff --git a/tools/ci_build/github/azure-pipelines/stages/jobs/steps/py_packaging_test_step.yml b/tools/ci_build/github/azure-pipelines/stages/jobs/steps/py_packaging_test_step.yml new file mode 100644 index 0000000000000..9a721c65de332 --- /dev/null +++ b/tools/ci_build/github/azure-pipelines/stages/jobs/steps/py_packaging_test_step.yml @@ -0,0 +1,21 @@ +parameters: +- name: EP_NAME + type: string + default: CPU + +- name: PYTHON_VERSION + type: string + +steps: +- powershell: | + python -m pip uninstall -y onnxruntime onnxruntime-gpu -qq + Get-ChildItem -Path $(Build.ArtifactStagingDirectory)/*cp${{ replace(parameters.PYTHON_VERSION,'.','') }}*.whl | foreach {pip --disable-pip-version-check install --upgrade $_.fullname tabulate} + mkdir -p $(Agent.TempDirectory)\ort_test_data + Copy-Item -Path $(Build.sourcesDirectory)/onnxruntime/test/python/onnx_backend_test_series.py -Destination $(Agent.TempDirectory)\ort_test_data + Copy-Item -Recurse -Path $(Build.sourcesDirectory)/onnxruntime/test/testdata -Destination $(Agent.TempDirectory)\ort_test_data + cd $(Agent.TempDirectory)\ort_test_data + python onnx_backend_test_series.py --devices ${{ parameters.EP_NAME }} -v + cd $(Agent.TempDirectory) + Remove-Item -Path $(Agent.TempDirectory)\ort_test_data -Recurse -Force + workingDirectory: '$(Build.sourcesDirectory)' + displayName: 'Run Python Tests with ${{ parameters.EP_NAME }} EP' \ No newline at end of file diff --git a/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml index 947e4f99b984f..f7235e3ad2076 100644 --- a/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml @@ -56,7 +56,7 @@ stages: PYTHON_VERSION: ${{ python_version }} EP_NAME: gpu CudaVersion: ${{ parameters.cuda_version }} - EP_BUILD_FLAGS: --enable_lto --cuda_home=$(Agent.TempDirectory)\v${{ parameters.cuda_version }} --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80" + EP_BUILD_FLAGS: --use_dml --enable_lto --cuda_home=$(Agent.TempDirectory)\v${{ parameters.cuda_version }} --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80" use_tensorrt: True - ${{ if eq(parameters.enable_linux_cuda, true) }}: diff --git a/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml index aa7f2845fc0fa..dd0539f751c89 100644 --- a/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml @@ -33,7 +33,7 @@ parameters: - Release - RelWithDebInfo - MinSizeRel - + - name: use_tensorrt type: boolean default: false @@ -134,7 +134,7 @@ stages: --cmake_generator "$(VSGenerator)" --enable_pybind --enable_onnx_tests - --parallel --use_binskim_compliant_compile_flags --update --build + --parallel 4 --use_binskim_compliant_compile_flags --update --build $(TelemetryOption) ${{ parameters.BUILD_PY_PARAMETERS }} ${{ parameters.EP_BUILD_FLAGS }} ${{ variables.trt_build_flag }} workingDirectory: '$(Build.BinariesDirectory)' @@ -206,19 +206,20 @@ stages: DownloadTRT: ${{ parameters.use_tensorrt }} - task: PowerShell@2 - displayName: 'Install ONNX' + displayName: 'Install Third Party Dependencies' inputs: filePath: '$(Build.SourcesDirectory)/tools/ci_build/github/windows/install_third_party_deps.ps1' workingDirectory: '$(Build.BinariesDirectory)' arguments: -cpu_arch x64 -install_prefix $(Build.BinariesDirectory)\${{ parameters.cmake_build_type }}\installed -build_config ${{ parameters.cmake_build_type }} - - powershell: | - python -m pip uninstall -y onnxruntime onnxruntime-gpu -qq - Get-ChildItem -Path $(Build.ArtifactStagingDirectory)/*cp${{ replace(parameters.PYTHON_VERSION,'.','') }}*.whl | foreach {pip --disable-pip-version-check install --upgrade $_.fullname tabulate} - mkdir -p $(Agent.TempDirectory)\ort_test_data - Copy-Item -Path $(Build.sourcesDirectory)/onnxruntime/test/python/onnx_backend_test_series.py -Destination $(Agent.TempDirectory)\ort_test_data - Copy-Item -Recurse -Path $(Build.sourcesDirectory)/onnxruntime/test/testdata -Destination $(Agent.TempDirectory)\ort_test_data - cd $(Agent.TempDirectory)\ort_test_data - python onnx_backend_test_series.py - workingDirectory: '$(Build.sourcesDirectory)' - displayName: 'Run Python Tests' + - template: jobs/steps/py_packaging_test_step.yml + parameters: + EP_NAME: DML + PYTHON_VERSION: ${{ parameters.PYTHON_VERSION }} + + - template: jobs/steps/py_packaging_test_step.yml + parameters: + EP_NAME: CUDA + PYTHON_VERSION: ${{ parameters.PYTHON_VERSION }} + +