Skip to content

Commit

Permalink
Fix BOM issue Generate-DocIndex.ps1 (#7629)
Browse files Browse the repository at this point in the history
  • Loading branch information
danieljurek authored Feb 6, 2024
1 parent efa8a15 commit 4ccb62a
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 44 deletions.
69 changes: 26 additions & 43 deletions eng/common/docgeneration/Generate-DocIndex.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -10,50 +10,33 @@ Param (
)
. "${PSScriptRoot}\..\scripts\common.ps1"

# Given the github io blob storage url and language regex,
# the helper function will return a list of artifact names.
function Get-BlobStorage-Artifacts($blobStorageUrl, $blobDirectoryRegex, $blobArtifactsReplacement) {
# Fetch a list of "artifacts" from blob storage corresponding to the given
# language (-storagePrefix). Remove the prefix from the path names to arrive at
# an "artifact" name.
function Get-BlobStorage-Artifacts(
$blobDirectoryRegex,
$blobArtifactsReplacement,
$storageAccountName,
$storageContainerName,
$storagePrefix
) {
LogDebug "Reading artifact from storage blob ..."
$returnedArtifacts = @()
$pageToken = ""
Do {
$resp = ""
if (!$pageToken) {
# First page call.
$resp = Invoke-RestMethod -Method Get -Uri $blobStorageUrl
}
else {
# Next page call
$blobStorageUrlPageToken = $blobStorageUrl + "&marker=$pageToken"
$resp = Invoke-RestMethod -Method Get -Uri $blobStorageUrlPageToken
}
# Convert to xml documents.
$xmlDoc = [xml](removeBomFromString $resp)
foreach ($elem in $xmlDoc.EnumerationResults.Blobs.BlobPrefix) {
# What service return like "dotnet/Azure.AI.Anomalydetector/", needs to fetch out "Azure.AI.Anomalydetector"
$artifact = $elem.Name -replace $blobDirectoryRegex, $blobArtifactsReplacement
$returnedArtifacts += $artifact
}
# Fetch page token
$pageToken = $xmlDoc.EnumerationResults.NextMarker
} while ($pageToken)
return $returnedArtifacts
}

# The sequence of Bom bytes differs by different encoding.
# The helper function here is only to strip the utf-8 encoding system as it is used by blob storage list api.
# Return the original string if not in BOM utf-8 sequence.
function RemoveBomFromString([string]$bomAwareString) {
if ($bomAwareString.length -le 3) {
return $bomAwareString
}
$bomPatternByteArray = [byte[]] (0xef, 0xbb, 0xbf)
# The default encoding for powershell is ISO-8859-1, so converting bytes with the encoding.
$bomAwareBytes = [Text.Encoding]::GetEncoding(28591).GetBytes($bomAwareString.Substring(0, 3))
if (@(Compare-Object $bomPatternByteArray $bomAwareBytes -SyncWindow 0).Length -eq 0) {
return $bomAwareString.Substring(3)
}
return $bomAwareString
# "--only-show-errors" suppresses warnings about the fact that the az CLI is not authenticated
# "--query '[].name'" returns a list of only blob names
# "--num-results *" handles pagination so the caller does not have to
$artifacts = az storage blob list `
--account-name $storageAccountName `
--container-name $storageContainerName `
--prefix $storagePrefix `
--delimiter / `
--only-show-errors `
--query '[].name' `
--num-results * | ConvertFrom-Json
LogDebug "Number of artifacts found: $($artifacts.Length)"

# example: "python/azure-storage-blob" -> "azure-storage-blob"
$artifacts = $artifacts.ForEach({ $_ -replace $blobDirectoryRegex, $blobArtifactsReplacement })
return $artifacts
}

function Get-TocMapping {
Expand Down
2 changes: 1 addition & 1 deletion eng/common/pipelines/templates/jobs/docindex.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
jobs:
- job: CreateDocIndex
pool:
vmImage: windows-2022
name: azsdk-pool-mms-win-2022-general
steps:
- task: UsePythonVersion@0
displayName: 'Use Python 3.9'
Expand Down

0 comments on commit 4ccb62a

Please sign in to comment.