Skip to content

Commit

Permalink
Add v0.1.0b5 for azure-ai-ml (Azure#25042)
Browse files Browse the repository at this point in the history
* 2.6.0 release for azure-ai-ml

* Updated changelog for azure-ai-ml

* Applied hotfixes to pass pipelines

* Revised version number

* Typo fix

* Added missing dependency for ml

* Replaced failing test files for ml

* Updated release cut to fix pipeline bugs for ml

* Added missing test configuration files

* Fixed anonymous asset test to use proper version

* Added py.typed file

* Moved py.typed file to namespace root

* Typo fix from previous commit

* Moved py.typed file to furthest non-code depth of ml package

* Removed extra comments

* Fixing version number

* Fixes from code review

* Updated 0.1.0b5 with hotfixes

* Updated release date on changelog
  • Loading branch information
nthandeMS authored and jeremydvoss committed Jul 21, 2022
1 parent 1ae25e1 commit c67468a
Show file tree
Hide file tree
Showing 101 changed files with 2,606 additions and 2,147 deletions.
31 changes: 24 additions & 7 deletions sdk/ml/azure-ai-ml/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,15 +1,32 @@
## Release History
# Release History

### 0.1.0b4 (unreleased)
## 0.1.0b5 (2022-07-15)

#### Features Added
### Features Added

#### Breaking Changes
- Allow Input/Output objects to be used by CommandComponent.
- Added MoonCake cloud support.
- Unified inputs/outputs building and validation logic in BaseNode.
- Allow Git repo URLs to be used as code for jobs and components.
- Updated AutoML YAML schema to use InputSchema.
- Added end_time to job schedule.
- MIR and pipeline job now support registry assets.

#### Bugs Fixed
### Breaking Changes

#### Other Changes
### Bugs Fixed

- Have mldesigner use argparser to parse incoming args.
- Bumped pyjwt version to <3.0.0.
- Reverted "upload support for symlinks".
- Error message improvement when a YAML UnionField fails to match.
- Reintroduced support for symlinks when uploading.
- Hard coded registry base URL to eastus region to support preview.

## 0.1.0b4 (2022-06-16)

## 0.1.0b3 (2022-05-24)

### Features Added

### 0.1.0b3 (2022-05-24)
- First preview.
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
# ---------------------------------------------------------

from typing import Dict, Any, Optional
from azure.ai.ml._azure_environments import ENDPOINT_URLS, _get_cloud_details, resource_to_scopes
from azure.ai.ml._azure_environments import (
_get_cloud_details,
_get_base_url_from_metadata,
_resource_to_scopes,
_get_azure_portal_id_from_metadata,
)
from azure.core.polling import LROPoller
from azure.ai.ml._arm_deployments.arm_helper import deployment_message_mapping
from azure.ai.ml._utils._arm_id_utils import get_arm_id_object_from_id
Expand Down Expand Up @@ -39,8 +44,8 @@ def __init__(
self._resource_group_name = resource_group_name
self._deployment_name = deployment_name
self._cloud = _get_cloud_details()
management_hostname = self._cloud.get(ENDPOINT_URLS.RESOURCE_MANAGER_ENDPOINT).strip("/")
credential_scopes = resource_to_scopes(management_hostname)
management_hostname = _get_base_url_from_metadata()
credential_scopes = _resource_to_scopes(management_hostname)
kwargs.pop("base_url", None)
if credential_scopes is not None:
kwargs["credential_scopes"] = credential_scopes
Expand Down Expand Up @@ -82,7 +87,7 @@ def deploy_resource(
)
module_logger.info(
ENDPOINT_DEPLOYMENT_START_MSG.format(
self._cloud.get(ENDPOINT_URLS.AZURE_PORTAL_ENDPOINT),
_get_azure_portal_id_from_metadata(),
self._subscription_id,
self._resource_group_name,
self._deployment_name,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pathlib import Path
from datetime import datetime, timedelta
import uuid
from azure.ai.ml._azure_environments import ENDPOINT_URLS, _get_cloud_details
from azure.ai.ml._azure_environments import _get_storage_endpoint_from_metadata

from azure.storage.blob import generate_blob_sas, BlobSasPermissions
from azure.storage.filedatalake import generate_file_sas, FileSasPermissions
Expand Down Expand Up @@ -68,8 +68,7 @@ def get_datastore_info(operations: DatastoreOperations, name: str) -> Dict[str,
else:
datastore = operations.get_default(include_secrets=True)

cloud_details = _get_cloud_details()
storage_endpoint = cloud_details.get(ENDPOINT_URLS.STORAGE_ENDPOINT)
storage_endpoint = _get_storage_endpoint_from_metadata()
credentials = datastore.credentials
datastore_info["storage_type"] = datastore.type
datastore_info["storage_account"] = datastore.account_name
Expand Down
118 changes: 12 additions & 106 deletions sdk/ml/azure-ai-ml/azure/ai/ml/_artifacts/_blob_storage_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,35 +6,26 @@
import logging
import time
import os
import warnings
from contextlib import suppress
from typing import Optional, Dict, Any, List, TYPE_CHECKING
from typing import Dict, List, TYPE_CHECKING
from pathlib import PurePosixPath, Path
from multiprocessing import cpu_count
from colorama import Fore
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm, TqdmWarning
from platform import system
import sys

from azure.ai.ml._utils._exception_utils import EmptyDirectoryError
from azure.storage.blob import BlobServiceClient, ContainerClient
from azure.core.exceptions import ResourceNotFoundError
from azure.ai.ml._utils._asset_utils import (
generate_asset_id,
traverse_directory,
upload_directory,
upload_file,
AssetNotChangedError,
_build_metadata_dict,
IgnoreFile,
FileUploadProgressBar,
get_directory_size,
)
from azure.ai.ml._artifacts._constants import (
UPLOAD_CONFIRMATION,
ARTIFACT_ORIGIN,
LEGACY_ARTIFACT_DIRECTORY,
EMPTY_DIRECTORY_ERROR,
PROCESSES_PER_CORE,
MAX_CONCURRENCY,
FILE_SIZE_WARNING,
BLOB_DATASTORE_IS_HDI_FOLDER_KEY,
Expand Down Expand Up @@ -103,11 +94,18 @@ def upload(

# start upload
if os.path.isdir(source):
self.upload_dir(source, asset_id, msg, show_progress, ignore_file=ignore_file)
upload_directory(
storage_client=self,
source=source,
dest=asset_id,
msg=msg,
show_progress=show_progress,
ignore_file=ignore_file,
)
else:
self.indicator_file = dest
self.check_blob_exists()
self.upload_file(source, dest, msg, show_progress)
upload_file(storage_client=self, source=source, dest=dest, msg=msg, show_progress=show_progress)
print(Fore.RESET + "\n", file=sys.stderr)

# upload must be completed before we try to generate confirmation file
Expand All @@ -124,98 +122,6 @@ def upload(

return artifact_info

def upload_file(
self,
source: str,
dest: str,
msg: Optional[str] = None,
show_progress: Optional[bool] = None,
in_directory: bool = False,
callback: Any = None,
) -> None:
"""
Upload a single file to a path inside the container
"""
validate_content = os.stat(source).st_size > 0 # don't do checksum for empty files

with open(source, "rb") as data:
if show_progress and not in_directory:
file_size, _ = get_directory_size(source)
file_size_in_mb = file_size / 10**6
if file_size_in_mb < 1:
msg += Fore.GREEN + " (< 1 MB)"
else:
msg += Fore.GREEN + f" ({round(file_size_in_mb, 2)} MBs)"
cntx_manager = FileUploadProgressBar(msg=msg)
else:
cntx_manager = suppress()

with cntx_manager as c:
callback = c.update_to if (show_progress and not in_directory) else None
self.container_client.upload_blob(
name=dest,
data=data,
validate_content=validate_content,
overwrite=self.overwrite,
raw_response_hook=callback,
max_concurrency=MAX_CONCURRENCY,
)

self.uploaded_file_count += 1

def upload_dir(self, source: str, dest: str, msg: str, show_progress: bool, ignore_file: IgnoreFile) -> None:
"""
Upload a directory to a path inside the container
Azure Blob doesn't allow metadata setting at the directory level, so the first
file in the directory is designated as the file where the confirmation metadata
will be added at the end of the upload.
"""
source_path = Path(source).resolve()
prefix = "" if dest == "" else dest + "/"
prefix += os.path.basename(source_path) + "/"

# get all paths in directory and each file's size
upload_paths = []
size_dict = {}
total_size = 0
for root, _, files in os.walk(source_path):
upload_paths += list(traverse_directory(root, files, source_path, prefix, ignore_file=ignore_file))

for path, _ in upload_paths:
path_size = os.path.getsize(path)
size_dict[path] = path_size
total_size += path_size

upload_paths = sorted(upload_paths)
if len(upload_paths) == 0:
raise EmptyDirectoryError(
message=EMPTY_DIRECTORY_ERROR.format(source),
no_personal_data_message=msg.format("[source]"),
target=ErrorTarget.ARTIFACT,
error_category=ErrorCategory.USER_ERROR,
)

self.indicator_file = upload_paths[0][1]
self.check_blob_exists()
self.total_file_count = len(upload_paths)

# submit paths to workers for upload
num_cores = int(cpu_count()) * PROCESSES_PER_CORE
with ThreadPoolExecutor(max_workers=num_cores) as ex:
futures_dict = {
ex.submit(self.upload_file, src, dest, in_directory=True, show_progress=show_progress): (src, dest)
for (src, dest) in upload_paths
}
if show_progress:
warnings.simplefilter("ignore", category=TqdmWarning)
msg += f" ({round(total_size/10**6, 2)} MBs)"
ascii = system() == "Windows" # Default unicode progress bar doesn't display well on Windows
with tqdm(total=total_size, desc=msg, ascii=ascii) as pbar:
for future in as_completed(futures_dict):
file_path_name = futures_dict[future][0]
pbar.update(size_dict.get(file_path_name) or 0)

def check_blob_exists(self) -> None:
"""
Throw error if blob already exists.
Expand Down
2 changes: 2 additions & 0 deletions sdk/ml/azure-ai-ml/azure/ai/ml/_artifacts/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,5 @@
"{jsonSchemaErrorPath}{jsonSchemaMessage}\n{invalidMLTableMsg}:\n{invalidSchemaSnippet}"
)
BLOB_DATASTORE_IS_HDI_FOLDER_KEY = "hdi_isfolder"
BLOB_STORAGE_CLIENT_NAME = "BlobStorageClient"
GEN2_STORAGE_CLIENT_NAME = "Gen2StorageClient"
Loading

0 comments on commit c67468a

Please sign in to comment.