Skip to content

Commit

Permalink
Renyix/data import (#29359)
Browse files Browse the repository at this point in the history
* Resolve Azure SDK comments
  • Loading branch information
xiongrenyi authored Mar 15, 2023
1 parent 63427b1 commit 46a38e7
Show file tree
Hide file tree
Showing 15 changed files with 352 additions and 341 deletions.
2 changes: 2 additions & 0 deletions sdk/ml/azure-ai-ml/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
- Added support for modifying SSH key values after creation on Compute Resources.
- Added WorkspaceConnection types `s3`, `snowflake`, `azure_sql_db`, `azure_synapse_analytics`, `azure_my_sql_db`, `azure_postgres_db`
- Added WorkspaceConnection auth type `access_key` for `s3`
- Added DataImport class and DataOperations.import_data.
- Added DataOperations.list_materialization_status - list status of data import jobs that create asset versions via asset name.

### Bugs Fixed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def make(self, data: Dict[str, str], **kwargs) -> ServicePrincipalConfiguration:
return ServicePrincipalConfiguration(**data)


class AccessKeyCredentialsSchema(metaclass=PatchedSchemaMeta):
class AccessKeyConfigurationSchema(metaclass=PatchedSchemaMeta):
type = StringTransformedEnum(
allowed_values=ConnectionAuthType.ACCESS_KEY,
casing_transform=camel_to_snake,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
SasTokenConfigurationSchema,
ServicePrincipalConfigurationSchema,
UsernamePasswordConfigurationSchema,
AccessKeyCredentialsSchema,
AccessKeyConfigurationSchema,
)
from azure.ai.ml._utils.utils import camel_to_snake
from azure.ai.ml.constants._common import AzureMLResourceType
Expand Down Expand Up @@ -50,7 +50,7 @@ class WorkspaceConnectionSchema(PathAwareSchema):
NestedField(UsernamePasswordConfigurationSchema),
NestedField(ManagedIdentityConfigurationSchema),
NestedField(ServicePrincipalConfigurationSchema),
NestedField(AccessKeyCredentialsSchema),
NestedField(AccessKeyConfigurationSchema),
]
)
metadata = fields.Dict(required=False, allow_none=True)
Expand Down
2 changes: 2 additions & 0 deletions sdk/ml/azure-ai-ml/azure/ai/ml/entities/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
ServicePrincipalConfiguration,
UserIdentityConfiguration,
UsernamePasswordConfiguration,
AccessKeyConfiguration,
)
from ._datastore.adls_gen1 import AzureDataLakeGen1Datastore
from ._datastore.azure_storage import AzureBlobDatastore, AzureDataLakeGen2Datastore, AzureFileDatastore
Expand Down Expand Up @@ -319,4 +320,5 @@
"ContainerRegistryCredential",
"EndpointAuthKeys",
"EndpointAuthToken",
"AccessKeyConfiguration",
]
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,15 @@ def _from_rest_object(cls, data_rest_object: DataVersionBase) -> "Data":
)
return data

@classmethod
def _resolve_cls_and_type(cls, data, params_override):
from azure.ai.ml.entities._data_import.data_import import DataImport

if "source" in data:
return DataImport, None

return cls, None

def _update_path(self, asset_artifact: ArtifactStorageInfo) -> None:
regex = r"datastores\/(.+)"
# datastore_arm_id is null for registry scenario, so capture the full_storage_path
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,18 +67,17 @@ def _load(
yaml_path: Optional[Union[PathLike, str]] = None,
params_override: Optional[list] = None,
**kwargs,
) -> Union["Data", "DataImport"]:
) -> "DataImport":
data = data or {}
params_override = params_override or []
context = {
BASE_PATH_CONTEXT_KEY: Path(yaml_path).parent if yaml_path else Path("./"),
PARAMS_OVERRIDE_KEY: params_override,
}

if "source" in data:
return DataImport._load_from_dict(yaml_data=data, context=context, **kwargs)
data_import = DataImport._load_from_dict(yaml_data=data, context=context, **kwargs)

return Data._load_from_dict(yaml_data=data, context=context, **kwargs)
return data_import

@classmethod
def _load_from_dict(cls, yaml_data: Dict, context: Dict, **kwargs) -> "DataImport":
Expand Down
7 changes: 3 additions & 4 deletions sdk/ml/azure-ai-ml/azure/ai/ml/entities/_load_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from azure.ai.ml.entities._component.parallel_component import ParallelComponent
from azure.ai.ml.entities._component.pipeline_component import PipelineComponent
from azure.ai.ml.entities._compute.compute import Compute
from azure.ai.ml.entities._data_import.data_import import DataImport
from azure.ai.ml.entities._datastore.datastore import Datastore
from azure.ai.ml.entities._deployment.batch_deployment import BatchDeployment
from azure.ai.ml.entities._deployment.online_deployment import OnlineDeployment
Expand Down Expand Up @@ -430,7 +429,7 @@ def load_data(
*,
relative_origin: Optional[str] = None,
**kwargs,
) -> Union[Data, DataImport]:
) -> Data:
"""Construct a data object from yaml file.
:param source: The local yaml source of a data object. Must be either a
Expand All @@ -451,9 +450,9 @@ def load_data(
:raises ~azure.ai.ml.exceptions.ValidationException: Raised if Data cannot be successfully validated.
Details will be provided in the error message.
:return: Constructed Data or DataImport object.
:rtype: Union[Data, DataImport]
:rtype: Data
"""
return load_common(DataImport, source, relative_origin, **kwargs)
return load_common(Data, source, relative_origin, **kwargs)


def load_environment(
Expand Down
20 changes: 10 additions & 10 deletions sdk/ml/azure-ai-ml/azure/ai/ml/operations/_data_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from marshmallow.exceptions import ValidationError as SchemaValidationError

from azure.ai.ml._utils._experimental import experimental
from azure.ai.ml.entities import Job, PipelineJob, PipelineJobSettings
from azure.ai.ml.entities import PipelineJob, PipelineJobSettings
from azure.ai.ml.data_transfer import import_data as import_data_func
from azure.ai.ml.entities._inputs_outputs import Output
from azure.ai.ml.entities._inputs_outputs.external_data import Database
Expand Down Expand Up @@ -359,13 +359,13 @@ def create_or_update(self, data: Data) -> Data:

@monitor_with_activity(logger, "Data.ImportData", ActivityType.PUBLICAPI)
@experimental
def import_data(self, data_import: DataImport) -> Job:
def import_data(self, data_import: DataImport, **kwargs) -> PipelineJob:
"""Returns the data import job that is creating the data asset.
:param data_import: DataImport object.
:type data_import: azure.ai.ml.entities.DataImport
:return: data import job object.
:rtype: ~azure.ai.ml.entities.Job
:rtype: ~azure.ai.ml.entities.PipelineJob
"""

experiment_name = "data_import_" + data_import.name
Expand Down Expand Up @@ -394,28 +394,28 @@ def import_data(self, data_import: DataImport) -> Job:
jobs={experiment_name: import_job},
)
import_pipeline.properties["azureml.materializationAssetName"] = data_import.name
return self._job_operation.create_or_update(job=import_pipeline, skip_validation=True)
return self._job_operation.create_or_update(job=import_pipeline, skip_validation=True, **kwargs)

@monitor_with_activity(logger, "Data.ShowMaterializationStatus", ActivityType.PUBLICAPI)
@monitor_with_activity(logger, "Data.ListMaterializationStatus", ActivityType.PUBLICAPI)
@experimental
def show_materialization_status(
def list_materialization_status(
self,
name: str,
*,
list_view_type: ListViewType = ListViewType.ACTIVE_ONLY,
) -> Iterable[Job]:
**kwargs,
) -> Iterable[PipelineJob]:
"""List materialization jobs of the asset.
:param name: name of asset being created by the materialization jobs.
:type name: str
:param list_view_type: View type for including/excluding (for example) archived jobs. Default: ACTIVE_ONLY.
:type list_view_type: Optional[ListViewType]
:return: An iterator like instance of Job objects.
:rtype: ~azure.core.paging.ItemPaged[Job]
:rtype: ~azure.core.paging.ItemPaged[PipelineJob]
"""

# TODO: Add back 'asset_name=name' filter once client switches to mfe 2023-02-01-preview and above
return self._job_operation.list(job_type="Pipeline", asset_name=name, list_view_type=list_view_type)
return self._job_operation.list(job_type="Pipeline", asset_name=name, list_view_type=list_view_type, **kwargs)

@monitor_with_activity(logger, "Data.Validate", ActivityType.INTERNALCALL)
def _validate(self, data: Data) -> Union[List[str], None]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"Connection": "keep-alive",
"Content-Length": "1083",
"Content-Type": "application/json",
"User-Agent": "azure-ai-ml/1.5.0 az00000-python-mgmt-machinelearningservices/0.1.0 Python/3.8.13 (Windows-10-10.0.22000-SP0)"
"User-Agent": "azure-ai-ml/1.6.0 azsdk-python-mgmt-machinelearningservices/0.1.0 Python/3.10.6 (Windows-10-10.0.22621-SP0)"
},
"RequestBody": {
"properties": {
Expand Down Expand Up @@ -55,22 +55,22 @@
"StatusCode": 201,
"ResponseHeaders": {
"Cache-Control": "no-cache",
"Content-Length": "2435",
"Content-Length": "2461",
"Content-Type": "application/json; charset=utf-8",
"Date": "Fri, 03 Mar 2023 04:59:52 GMT",
"Date": "Wed, 15 Mar 2023 19:20:17 GMT",
"Expires": "-1",
"Location": "https://management.azure.com/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/jobs/000000000000000000000?api-version=2023-02-01-preview",
"Pragma": "no-cache",
"Request-Context": "appId=cid-v1:17d65b70-e9ce-4ed5-9347-1f660ec782e9",
"Server-Timing": "traceparent;desc=\u002200-e3e1b1c5b1ea6070b93160a94eadaeee-411f41200f1b9428-01\u0022",
"request-context": "appId=cid-v1:512cc15a-13b5-415b-bfd0-dce7accb6bb1",
"Server-Timing": "traceparent;desc=\u002200-c82ed271972e0ae80aea387985bd0994-31af1a355d305790-01\u0022",
"Strict-Transport-Security": "max-age=31536000; includeSubDomains",
"x-aml-cluster": "vienna-eastus2euap-02",
"x-aml-cluster": "vienna-test-westus2-01",
"X-Content-Type-Options": "nosniff",
"x-ms-correlation-request-id": "59cab1de-7e2f-434d-953c-268c681a130e",
"x-ms-correlation-request-id": "88fe5c87-cb15-4495-bada-9bd1021811b4",
"x-ms-ratelimit-remaining-subscription-writes": "1199",
"x-ms-response-type": "standard",
"x-ms-routing-request-id": "JAPANEAST:20230303T045952Z:59cab1de-7e2f-434d-953c-268c681a130e",
"x-request-time": "2.205"
"x-ms-routing-request-id": "WESTUS:20230315T192018Z:88fe5c87-cb15-4495-bada-9bd1021811b4",
"x-request-time": "4.314"
},
"ResponseBody": {
"id": "/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000/jobs/000000000000000000000",
Expand All @@ -86,7 +86,7 @@
"runSource": "MFE",
"runType": "HTTP",
"azureml.parameters": "{}",
"azureml.continue_on_step_failure": "False",
"azureml.continue_on_step_failure": "True",
"azureml.continue_on_failed_optional_input": "True",
"azureml.enforceRerun": "True",
"azureml.defaultDataStoreName": "workspaceblobstore",
Expand All @@ -99,7 +99,7 @@
"Tracking": {
"jobServiceType": "Tracking",
"port": null,
"endpoint": "azureml://eastus2euap.api.azureml.ms/mlflow/v1.0/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000?",
"endpoint": "azureml://master.api.azureml-test.ms/mlflow/v1.0/subscriptions/00000000-0000-0000-0000-000000000/resourceGroups/00000/providers/Microsoft.MachineLearningServices/workspaces/00000?",
"status": null,
"errorMessage": null,
"properties": null,
Expand Down Expand Up @@ -152,7 +152,7 @@
"sourceJobId": null
},
"systemData": {
"createdAt": "2023-03-03T04:59:52.1317137\u002B00:00",
"createdAt": "2023-03-15T19:20:17.7591391\u002B00:00",
"createdBy": "Firstname Lastname",
"createdByType": "User"
}
Expand Down
Loading

0 comments on commit 46a38e7

Please sign in to comment.