Skip to content

Commit

Permalink
Use azcopy to access azure blob with FWI (#8160)
Browse files Browse the repository at this point in the history
* Use azcopy to access azure blob with FWI

* Make service-connection as variable
  • Loading branch information
raych1 authored Apr 26, 2024
1 parent a2d47a9 commit a8b0879
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 52 deletions.
36 changes: 20 additions & 16 deletions tools/sdk-ai-bots/.pipelines/build-document-embeddings.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ variables:
asch-endpoint: $(azure-search-endpoint)
asch-index-name: $(azure-search-index-name)
aoai-embedding-model: $(azure-openai-embedding-model)
azure-subscription-name: $(service-connection-name)

parameters:
- name: incrementalEmbedding
Expand All @@ -35,12 +36,13 @@ stages:
- template: setup-pipeline.yml
- checkout: git://internal/_git/azure-sdk-docs-eng.ms
displayName: 'Checkout azure-sdk-docs-eng.ms repository'
- task: Powershell@2
- task: AzurePowerShell@5
inputs:
filePath: $(Build.SourcesDirectory)/azure-sdk-tools/tools/sdk-ai-bots/Scripts/Build-EngHubDocEmbeddings.ps1
arguments: >
-IncrementalEmbedding "${{ parameters.incrementalEmbedding }}"
pwsh: true
azureSubscription: $(azure-subscription-name)
ScriptType: 'FilePath'
ScriptPath: '$(Build.SourcesDirectory)/azure-sdk-tools/tools/sdk-ai-bots/Scripts/Build-EngHubDocEmbeddings.ps1'
arguments: '-IncrementalEmbedding "${{ parameters.incrementalEmbedding }}"'
azurePowerShellVersion: 'LatestVersion'
workingDirectory: $(Build.SourcesDirectory)
displayName: 'Run embeddings build script'
env:
Expand All @@ -60,12 +62,13 @@ stages:
- job: BuildTypeSpecDocumentEmbeddings
steps:
- template: setup-pipeline.yml
- task: Powershell@2
- task: AzurePowerShell@5
inputs:
filePath: $(Build.SourcesDirectory)/tools/sdk-ai-bots/Scripts/Build-TypeSpecAzureDocEmbeddings.ps1
arguments: >
-IncrementalEmbedding "${{ parameters.incrementalEmbedding }}"
pwsh: true
azureSubscription: $(azure-subscription-name)
ScriptType: 'FilePath'
ScriptPath: $(Build.SourcesDirectory)/tools/sdk-ai-bots/Scripts/Build-TypeSpecAzureDocEmbeddings.ps1
arguments: '-IncrementalEmbedding "${{ parameters.incrementalEmbedding }}"'
azurePowerShellVersion: 'LatestVersion'
workingDirectory: $(Build.SourcesDirectory)
displayName: 'Run embeddings build script'
env:
Expand All @@ -77,20 +80,21 @@ stages:
AZURE_SEARCH_KEY: $(azure-search-key)
AZURE_STORAGE_ACCOUNT_KEY: $(storage-account-key)
AZURE_STORAGE_ACCOUNT_NAME: $(st-account-name)
AZURE_STORAGE_ACCOUNT_CONTAINER: $(st-container-name)
AZURE_STORAGE_ACCOUNT_CONTAINER: $(st-container-name)

- stage: BuildCustomizedDocEmbeddings
displayName: 'Build Customized Document Embeddings'
jobs:
- job: BuildCustomizedDocumentEmbeddings
steps:
- template: setup-pipeline.yml
- task: Powershell@2
- task: AzurePowerShell@5
inputs:
filePath: $(Build.SourcesDirectory)/tools/sdk-ai-bots/Scripts/Build-CustomizedDocEmbeddings.ps1
arguments: >
-IncrementalEmbedding "${{ parameters.incrementalEmbedding }}"
pwsh: true
azureSubscription: $(azure-subscription-name)
ScriptType: 'FilePath'
ScriptPath: $(Build.SourcesDirectory)/tools/sdk-ai-bots/Scripts/Build-CustomizedDocEmbeddings.ps1
arguments: '-IncrementalEmbedding "${{ parameters.incrementalEmbedding }}"'
azurePowerShellVersion: 'LatestVersion'
workingDirectory: $(Build.SourcesDirectory)
displayName: 'Run embeddings build script'
env:
Expand Down
8 changes: 3 additions & 5 deletions tools/sdk-ai-bots/Scripts/Build-CustomizedDocEmbeddings.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,6 @@ Write-Host "scriptsRoot: $scriptsRoot"
Write-Host "embeddingToolFolder: $embeddingToolFolder"
. (Join-Path $scriptsRoot Common.ps1)

# Install Az.Storage module
if (-not (Get-Module -ListAvailable -Name Az.Storage)) {
Install-Module -Name Az.Storage -Force -AllowClobber -Scope CurrentUser
}

# Create embeddingSource folder on current location
$embeddingSourceFolder = Join-Path -Path $workingDirectory -ChildPath "embeddingSource"
if (-not (Test-Path -Path $embeddingSourceFolder)) {
Expand Down Expand Up @@ -108,6 +103,9 @@ else {
}

# Download previous saved embeddings(last_rag_chunks_customized_docs.json) from Azure Blob Storage
# Using Azure PowerShell login type for AzCopy.
# When running this script locally, first using 'Connect-AzAccount' then 'Set-AzContext' to switch to the correct subscription
$env:AZCOPY_AUTO_LOGIN_TYPE="PSCRED"
$blobName = "last_rag_chunks_customized_docs.json"
$destinationPath = $embeddingSourceFolder
$ragChunkPath = Join-Path -Path $embeddingSourceFolder -ChildPath $blobName
Expand Down
8 changes: 3 additions & 5 deletions tools/sdk-ai-bots/Scripts/Build-EngHubDocEmbeddings.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,6 @@ Write-Host "scriptsRoot: $scriptsRoot"
Write-Host "embeddingToolFolder: $embeddingToolFolder"
. (Join-Path $scriptsRoot Common.ps1)

# Install Az.Storage module
if (-not (Get-Module -ListAvailable -Name Az.Storage)) {
Install-Module -Name Az.Storage -Force -AllowClobber -Scope CurrentUser
}

# Create embeddingSource folder on current location
$embeddingSourceFolder = Join-Path -Path $workingDirectory -ChildPath "embeddingSource"
if (-not (Test-Path -Path $embeddingSourceFolder)) {
Expand Down Expand Up @@ -76,6 +71,9 @@ else {
}

# Download previous saved embeddings(last_rag_chunks_enghub_docs.json) from Azure Blob Storage
# Using Azure PowerShell login type for AzCopy.
# When running this script locally, first using 'Connect-AzAccount' then 'Set-AzContext' to switch to the correct subscription
$env:AZCOPY_AUTO_LOGIN_TYPE="PSCRED"
$blobName = "last_rag_chunks_enghub_docs.json"
$destinationPath = $embeddingSourceFolder
$ragChunkPath = Join-Path -Path $embeddingSourceFolder -ChildPath $blobName
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,6 @@ Write-Host "scriptsRoot: $scriptsRoot"
Write-Host "embeddingToolFolder: $embeddingToolFolder"
. (Join-Path $scriptsRoot Common.ps1)

# Install Az.Storage module
if (-not (Get-Module -ListAvailable -Name Az.Storage)) {
Install-Module -Name Az.Storage -Force -AllowClobber -Scope CurrentUser
}

# Create embeddingSource folder on current location
$embeddingSourceFolder = Join-Path -Path $workingDirectory -ChildPath "embeddingSource"
if (-not (Test-Path -Path $embeddingSourceFolder)) {
Expand Down Expand Up @@ -73,6 +68,9 @@ else {
}

# Download previous saved embeddings(last_rag_chunks_typespec_docs.json) from Azure Blob Storage
# Using Azure PowerShell login type for AzCopy.
# When running this script locally, first using 'Connect-AzAccount' then 'Set-AzContext' to switch to the correct subscription
$env:AZCOPY_AUTO_LOGIN_TYPE="PSCRED"
$blobName = "last_rag_chunks_typespec_docs.json"
$destinationPath = $embeddingSourceFolder
$ragChunkPath = Join-Path -Path $embeddingSourceFolder -ChildPath $blobName
Expand Down
91 changes: 71 additions & 20 deletions tools/sdk-ai-bots/Scripts/Common.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,39 @@ function Clone-Repository {
return $true
}

function Test-AzCopyInstalled {
try {
$azcopyCommand = Get-Command azcopy -ErrorAction Stop
if ($azcopyCommand) {
return $true
}
}
catch {
Write-Error "AzCopy is not installed."
}
return $false
}

function Download-AzCopy {
param (
[Parameter(Position = 0)]
[ValidateNotNullOrEmpty()]
[string] $DestinationPath
)

try {
$AzCopyUrl = "https://azcopyvnext.azureedge.net/release20220315/azcopy_windows_amd64_10.14.1.zip"
$azCopyZip = Join-Path $DestinationPath "azcopy.zip"
Invoke-WebRequest -Uri $AzCopyUrl -OutFile $azCopyZip
Expand-Archive -Path $azCopyZip -DestinationPath $DestinationPath -Force
return $true
}
catch {
Write-Error "Failed to download AzCopy with exception:`n$_"
}
return $false
}

function Download-AzureBlob {
param (
[Parameter(Position = 0)]
Expand All @@ -49,20 +82,32 @@ function Download-AzureBlob {
[ValidateNotNullOrEmpty()]
[string] $DestinationPath
)

$storageAccountKey = $env:AZURE_STORAGE_ACCOUNT_KEY
if (-not $storageAccountKey) {
Write-Error "Please set the environment variable 'AZURE_STORAGE_ACCOUNT_KEY'."
return $false
}
try {
$context = New-AzStorageContext -StorageAccountName $StorageAccountName -StorageAccountKey $storageAccountKey

$blob = Get-AzStorageBlob -Context $context -Container $ContainerName -Blob $BlobName

$blobPath = "https://$StorageAccountName.blob.core.windows.net/$ContainerName/$BlobName"
$destinationFile = Join-Path -Path $DestinationPath -ChildPath $BlobName

$blob | Get-AzStorageBlobContent -Destination $destinationFile -Force
$azcopyCmd = "azcopy copy $blobPath $destinationFile --recursive"
if (-not (Test-AzCopyInstalled)) {
if(Download-AzCopy (Get-Location).Path) {
$azFilePath = (Get-ChildItem -Recurse |Where-object {$_.Name -eq 'azcopy.exe'} | Select-Object -First 1).FullName
$azcopyCmd = "$azFilePath copy $blobPath $destinationFile --recursive"
}
else
{
return $false
}
}
# If the following command stuck for a long time, it may be caused by the login need to be done manually.
# You can run the azcopycmd manually.
Write-Host "azcopyCmd: $azcopyCmd"
$azcopyOutput = Invoke-Expression $azcopyCmd
Write-Host "azcopyOutput: $azcopyOutput"
if(Test-Path $destinationFile) {
Write-Host "$destinationFile downloaded successfully."
}
else {
Write-Error "$destinationFile failed to download."
return $false
}
return $true
}
catch {
Expand Down Expand Up @@ -142,15 +187,21 @@ function Upload-AzureBlob {
[string] $SourceFile
)

$storageAccountKey = $env:AZURE_STORAGE_ACCOUNT_KEY
if (-not $storageAccountKey) {
Write-Error "Please set the environment variable 'AZURE_STORAGE_ACCOUNT_KEY'."
return $false
}
try {
$context = New-AzStorageContext -StorageAccountName $StorageAccountName -StorageAccountKey $storageAccountKey

$blob = Set-AzStorageBlobContent -Context $context -Container $ContainerName -Blob $BlobName -File $SourceFile -Force
$blobPath = "https://$StorageAccountName.blob.core.windows.net/$ContainerName/$BlobName"
$azcopyCmd = "azcopy copy $SourceFile $blobPath"
if (-not (Test-AzCopyInstalled)) {
if(Download-AzCopy (Get-Location).Path) {
$azFilePath = (Get-ChildItem -Recurse |Where-object {$_.Name -eq 'azcopy.exe'} | Select-Object -First 1).FullName
$azcopyCmd = "$azFilePath copy $SourceFile $blobPath"
}
else {
return $false
}
}
Write-Host "azcopyCmd: $azcopyCmd"
$azcopyOutput = Invoke-Expression $azcopyCmd
Write-Host "azcopyOutput: $azcopyOutput"
return $true
}
catch {
Expand Down
3 changes: 2 additions & 1 deletion tools/sdk-ai-bots/Scripts/Markdown-BuildIndexMetadata.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ function Copy-Files([string]$rootFolder, [string]$outputFolder) {
New-Item -ItemType Directory -Path $outputFolder
}
$newFileName = Join-Path $outputFolder $key
Copy-Item -Path $_ -Destination $newFileName
Write-Debug "Copying file $_ to $newFileName"
Copy-Item -Path $_.FullName -Destination $newFileName
}
}
}
Expand Down

0 comments on commit a8b0879

Please sign in to comment.