From 20f45c6dc2fb3013fd1e22b4cf888b8765311bb0 Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Mon, 24 Oct 2022 22:19:15 -0700 Subject: [PATCH 1/2] Fix DML packaging pipeline --- .../c-api-noopenmp-packaging-pipelines.yml | 12 ++++++++---- .../nuget/templates/win-ci-2019.yml | 17 +++++++++-------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml index eda85e34fb444..4c610b104fb17 100644 --- a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml +++ b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml @@ -665,6 +665,7 @@ jobs: - template: nuget/templates/win-ci-2019.yml parameters: AgentPool : 'onnxruntime-gpu-winbuild' + IsReleaseBuild: ${{ parameters.IsReleaseBuild }} ArtifactName: 'drop-nuget-dml' JobName: 'Windows_CI_GPU_DML_Dev' BuildCommand: --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --enable_onnx_tests --enable_wcos --use_telemetry --use_dml --use_winml --cmake_generator "Visual Studio 16 2019" @@ -679,7 +680,7 @@ jobs: CudaVersion: '11.6' OrtPackageId: 'Microsoft.ML.OnnxRuntime.DirectML' NuPackScript: | - msbuild $(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.proj /p:Configuration=RelWithDebInfo /t:CreatePackage /p:OrtPackageId=Microsoft.ML.OnnxRuntime.DirectML + msbuild $(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.proj /p:Configuration=RelWithDebInfo /t:CreatePackage /p:OrtPackageId=Microsoft.ML.OnnxRuntime.DirectML /p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} copy $(Build.SourcesDirectory)\csharp\src\Microsoft.ML.OnnxRuntime\bin\RelWithDebInfo\*.nupkg $(Build.ArtifactStagingDirectory) copy $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\*.nupkg $(Build.ArtifactStagingDirectory) mkdir $(Build.ArtifactStagingDirectory)\testdata @@ -688,6 +689,7 @@ jobs: - template: nuget/templates/win-ci-2019.yml parameters: AgentPool : 'onnxruntime-gpu-winbuild' + IsReleaseBuild: ${{ parameters.IsReleaseBuild }} ArtifactName: 'drop-win-dml-x86-zip' JobName: 'Windows_CI_GPU_DML_Dev_x86' BuildCommand: --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --enable_onnx_tests --enable_wcos --use_telemetry --use_dml --use_winml --cmake_generator "Visual Studio 16 2019" @@ -700,7 +702,7 @@ jobs: DoEsrp: ${{ parameters.DoEsrp }} RunTests: 'false' NuPackScript: | - msbuild $(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.proj /p:Configuration=RelWithDebInfo /p:TargetArchitecture=x86 /t:CreatePackage /p:OrtPackageId=Microsoft.ML.OnnxRuntime.DirectML + msbuild $(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.proj /p:Configuration=RelWithDebInfo /p:TargetArchitecture=x86 /t:CreatePackage /p:OrtPackageId=Microsoft.ML.OnnxRuntime.DirectML /p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} cd $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\ ren Microsoft.ML.OnnxRuntime.DirectML.* win-dml-x86.zip copy $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\win-dml-x86.zip $(Build.ArtifactStagingDirectory) @@ -710,6 +712,7 @@ jobs: - template: nuget/templates/win-ci-2019.yml parameters: AgentPool : 'onnxruntime-gpu-winbuild' + IsReleaseBuild: ${{ parameters.IsReleaseBuild }} ArtifactName: 'drop-win-dml-arm64-zip' JobName: 'Windows_CI_GPU_DML_Dev_arm64' BuildCommand: --build_dir $(Build.BinariesDirectory) --arm64 --skip_submodule_sync --build_shared_lib --enable_onnx_tests --enable_wcos --use_telemetry --use_dml --use_winml --cmake_generator "Visual Studio 16 2019" @@ -722,7 +725,7 @@ jobs: DoEsrp: ${{ parameters.DoEsrp }} RunTests: 'false' NuPackScript: | - msbuild $(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.proj /p:Configuration=RelWithDebInfo /p:TargetArchitecture=arm64 /t:CreatePackage /p:OrtPackageId=Microsoft.ML.OnnxRuntime.DirectML + msbuild $(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.proj /p:Configuration=RelWithDebInfo /p:TargetArchitecture=arm64 /t:CreatePackage /p:OrtPackageId=Microsoft.ML.OnnxRuntime.DirectML /p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} cd $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\ ren Microsoft.ML.OnnxRuntime.DirectML.* win-dml-arm64.zip copy $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\win-dml-arm64.zip $(Build.ArtifactStagingDirectory) @@ -732,6 +735,7 @@ jobs: - template: nuget/templates/win-ci-2019.yml parameters: AgentPool : 'onnxruntime-gpu-winbuild' + IsReleaseBuild: ${{ parameters.IsReleaseBuild }} ArtifactName: 'drop-win-dml-arm-zip' JobName: 'Windows_CI_GPU_DML_Dev_arm' BuildCommand: --build_dir $(Build.BinariesDirectory) --arm --skip_submodule_sync --build_shared_lib --enable_onnx_tests --enable_wcos --use_telemetry --use_dml --use_winml --cmake_generator "Visual Studio 16 2019" @@ -744,7 +748,7 @@ jobs: DoEsrp: ${{ parameters.DoEsrp }} RunTests: 'false' NuPackScript: | - msbuild $(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.proj /p:Configuration=RelWithDebInfo /p:TargetArchitecture=arm /t:CreatePackage /p:OrtPackageId=Microsoft.ML.OnnxRuntime.DirectML + msbuild $(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.proj /p:Configuration=RelWithDebInfo /p:TargetArchitecture=arm /t:CreatePackage /p:OrtPackageId=Microsoft.ML.OnnxRuntime.DirectML /p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} cd $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\ ren Microsoft.ML.OnnxRuntime.DirectML.* win-dml-arm.zip copy $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\win-dml-arm.zip $(Build.ArtifactStagingDirectory) diff --git a/tools/ci_build/github/azure-pipelines/nuget/templates/win-ci-2019.yml b/tools/ci_build/github/azure-pipelines/nuget/templates/win-ci-2019.yml index c965c4702356d..cce2e795a8496 100644 --- a/tools/ci_build/github/azure-pipelines/nuget/templates/win-ci-2019.yml +++ b/tools/ci_build/github/azure-pipelines/nuget/templates/win-ci-2019.yml @@ -22,6 +22,7 @@ parameters: EnableLto: true # Controls whether unreleased onnx opsets are allowed. Default is set to 1 AllowReleasedOpsetOnly: '0' + IsReleaseBuild: false jobs: - job: ${{ parameters.JobName }} timeoutInMinutes: 200 @@ -152,7 +153,7 @@ jobs: solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln' configuration: '$(BuildConfig)' platform: 'Any CPU' - msbuildArguments: '-p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=${{ parameters.OrtPackageId }}' + msbuildArguments: '-p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=${{ parameters.OrtPackageId }} -p:IsReleaseBuild=${{ parameters.IsReleaseBuild }}' workingDirectory: '$(Build.SourcesDirectory)\csharp' - script: | @@ -227,13 +228,13 @@ jobs: artifactName: ${{ parameters.ArtifactName }} targetPath: '$(Build.ArtifactStagingDirectory)' - - task: PublishSymbols@2 - displayName: 'Publish Build Symbols' - condition: eq(variables['IsReleaseBuild'], 'true') - inputs: - symbolsFolder: '$(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo' - searchPattern: '**/*.pdb' - symbolServerType: teamServices + - ${{ if eq(parameters['IsReleaseBuild'], 'true') }}: + - task: PublishSymbols@2 + displayName: 'Publish Build Symbols' + inputs: + symbolsFolder: '$(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo' + searchPattern: '**/*.pdb' + symbolServerType: teamServices # Node.js Publish - ${{ if eq(parameters['DoNodejsPack'], 'true') }}: From 74383f265fe5a205d7ffc6f5f07a0f98c3e4f641 Mon Sep 17 00:00:00 2001 From: PeixuanZuo <94887879+PeixuanZuo@users.noreply.github.com> Date: Thu, 20 Oct 2022 12:08:57 +0800 Subject: [PATCH 2/2] [ROCm] Fix azcopy issue on ROCm ci pipeline (#13365) ### Description Use SAS Token to fix error` failed to perform copy command due to error: no SAS token or OAuth token is present and the resource is not public` Generate SAS Token of target data, add it into Key vault, and use it as Pipeline Variable. ### Motivation and Context Co-authored-by: peixuanzuo --- orttraining/tools/ci_test/download_azure_blob_archive.py | 8 +++++++- .../orttraining-linux-gpu-amd-e2e-test-ci-pipeline.yml | 7 ++++--- .../azure-pipelines/orttraining-pai-ci-pipeline.yml | 4 +++- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/orttraining/tools/ci_test/download_azure_blob_archive.py b/orttraining/tools/ci_test/download_azure_blob_archive.py index 32743f63281df..6fa875a1d2373 100755 --- a/orttraining/tools/ci_test/download_azure_blob_archive.py +++ b/orttraining/tools/ci_test/download_azure_blob_archive.py @@ -57,7 +57,13 @@ def main(): with tempfile.TemporaryDirectory() as temp_dir, get_azcopy() as azcopy_path: archive_path = os.path.join(temp_dir, "archive.zip") print("Downloading archive from '{}'...".format(args.azure_blob_url)) - _download(azcopy_path, args.azure_blob_url, archive_path) + + azure_blob_url = args.azure_blob_url + azure_blob_sas_token = os.getenv("AZURE_BLOB_SAS_TOKEN", None) + if azure_blob_sas_token and azure_blob_sas_token != "": + azure_blob_url = azure_blob_url + "?" + azure_blob_sas_token + + _download(azcopy_path, azure_blob_url, archive_path) if args.archive_sha256_digest: _check_file_sha256_digest(archive_path, args.archive_sha256_digest) print("Extracting to '{}'...".format(args.target_dir)) diff --git a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-amd-e2e-test-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-amd-e2e-test-ci-pipeline.yml index bc75073c13ca5..8207a336880ff 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-amd-e2e-test-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-linux-gpu-amd-e2e-test-ci-pipeline.yml @@ -20,11 +20,12 @@ jobs: eval "$('/home/ciagent/conda/bin/conda' 'shell.bash' 'hook' 2> /dev/null)" echo "Selecting GPU based on HIP_VISIBLE_DEVICES=$HIP_VISIBLE_DEVICES" displayName: 'Initialize environment' - + # update these if the E2E test data changes - script: |- + export AZURE_BLOB_SAS_TOKEN="$(onnxruntimetestdata-storage-training-container-sas-token)" python orttraining/tools/ci_test/download_azure_blob_archive.py \ - --azure_blob_url https://onnxruntimetestdata.blob.core.windows.net/training/onnxruntime_training_data.zip?snapshot=2020-06-15T23:17:35.8314853Z \ + --azure_blob_url https://onnxruntimetestdata.blob.core.windows.net/training/onnxruntime_training_data.zip \ --target_dir training_e2e_test_data \ --archive_sha256_digest B01C169B6550D1A0A6F1B4E2F34AE2A8714B52DBB70AC04DA85D371F691BDFF9 displayName: 'Download onnxruntime_training_data.zip data' @@ -66,7 +67,7 @@ jobs: --gpu_sku MI100_32G displayName: 'Run C++ BERT-L performance test' condition: succeededOrFailed() # ensure all tests are run - + - script: |- python orttraining/tools/ci_test/run_convergence_test.py \ --binary_dir build/RelWithDebInfo \ diff --git a/tools/ci_build/github/azure-pipelines/orttraining-pai-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-pai-ci-pipeline.yml index 1e19afabb178b..3a4e13e5f3796 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-pai-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-pai-ci-pipeline.yml @@ -278,11 +278,13 @@ jobs: - task: CmdLine@2 inputs: script: |- + export AZURE_BLOB_SAS_TOKEN="$(onnxruntimetestdata-storage-training-container-sas-token)" python orttraining/tools/ci_test/download_azure_blob_archive.py \ - --azure_blob_url https://onnxruntimetestdata.blob.core.windows.net/training/onnxruntime_training_data.zip?snapshot=2020-06-15T23:17:35.8314853Z \ + --azure_blob_url https://onnxruntimetestdata.blob.core.windows.net/training/onnxruntime_training_data.zip \ --target_dir training_e2e_test_data \ --archive_sha256_digest B01C169B6550D1A0A6F1B4E2F34AE2A8714B52DBB70AC04DA85D371F691BDFF9 condition: and(succeededOrFailed(), eq(variables.onnxruntimeBuildSucceeded, 'true')) # ensure all tests are run when the build successed + retryCountOnTaskFailure: 2 displayName: 'Download onnxruntime_training_data.zip data' - task: CmdLine@2