From 1c4ac242d6861a86c4962e9100717f2b5cda241b Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Wed, 4 May 2022 16:56:04 -0400 Subject: [PATCH 01/14] Create abstract forecasting training job class. Moves AutoMLForecasting training logic into a base class so we can reuse it for other forecasting models. Adds tests for a future seq2seq training job. --- google/cloud/aiplatform/schema.py | 1 + google/cloud/aiplatform/training_jobs.py | 3326 +++++++++-------- .../test_automl_forecasting_training_jobs.py | 145 +- 3 files changed, 1985 insertions(+), 1487 deletions(-) diff --git a/google/cloud/aiplatform/schema.py b/google/cloud/aiplatform/schema.py index a1da75d9e6..8c8e7f32f3 100644 --- a/google/cloud/aiplatform/schema.py +++ b/google/cloud/aiplatform/schema.py @@ -23,6 +23,7 @@ class definition: custom_task = "gs://google-cloud-aiplatform/schema/trainingjob/definition/custom_task_1.0.0.yaml" automl_tabular = "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_tabular_1.0.0.yaml" automl_forecasting = "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_time_series_forecasting_1.0.0.yaml" + seq2seq_forecasting = "gs://google-cloud-aiplatform/schema/trainingjob/definition/seq2seq_plus_time_series_forecasting_1.0.0.yaml" automl_image_classification = "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_image_classification_1.0.0.yaml" automl_image_object_detection = "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_image_object_detection_1.0.0.yaml" automl_text_classification = "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_text_classification_1.0.0.yaml" diff --git a/google/cloud/aiplatform/training_jobs.py b/google/cloud/aiplatform/training_jobs.py index a6244e08ca..3f308b510a 100644 --- a/google/cloud/aiplatform/training_jobs.py +++ b/google/cloud/aiplatform/training_jobs.py @@ -1561,176 +1561,68 @@ def _model_upload_fail_string(self) -> str: ) -# TODO(b/172368325) add scheduling, custom_job.Scheduling -class CustomTrainingJob(_CustomTrainingJob): - """Class to launch a Custom Training Job in Vertex AI using a script. - - Takes a training implementation as a python script and executes that - script in Cloud Vertex AI Training. - """ +class _ForecastingTrainingJob(_TrainingJob): + """ABC for Forecasting Training Pipelines.""" def __init__( self, - # TODO(b/223262536): Make display_name parameter fully optional in next major release - display_name: str, - script_path: str, - container_uri: str, - requirements: Optional[Sequence[str]] = None, - model_serving_container_image_uri: Optional[str] = None, - model_serving_container_predict_route: Optional[str] = None, - model_serving_container_health_route: Optional[str] = None, - model_serving_container_command: Optional[Sequence[str]] = None, - model_serving_container_args: Optional[Sequence[str]] = None, - model_serving_container_environment_variables: Optional[Dict[str, str]] = None, - model_serving_container_ports: Optional[Sequence[int]] = None, - model_description: Optional[str] = None, - model_instance_schema_uri: Optional[str] = None, - model_parameters_schema_uri: Optional[str] = None, - model_prediction_schema_uri: Optional[str] = None, + display_name: Optional[str] = None, + optimization_objective: Optional[str] = None, + column_specs: Optional[Dict[str, str]] = None, + column_transformations: Optional[List[Dict[str, Dict[str, str]]]] = None, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, labels: Optional[Dict[str, str]] = None, training_encryption_spec_key_name: Optional[str] = None, model_encryption_spec_key_name: Optional[str] = None, - staging_bucket: Optional[str] = None, ): - """Constructs a Custom Training Job from a Python script. - - job = aiplatform.CustomTrainingJob( - display_name='test-train', - script_path='test_script.py', - requirements=['pandas', 'numpy'], - container_uri='gcr.io/cloud-aiplatform/training/tf-cpu.2-2:latest', - model_serving_container_image_uri='gcr.io/my-trainer/serving:1', - model_serving_container_predict_route='predict', - model_serving_container_health_route='metadata, - labels={'key': 'value'}, - ) - - Usage with Dataset: - - ds = aiplatform.TabularDataset( - 'projects/my-project/locations/us-central1/datasets/12345') - - job.run( - ds, - replica_count=1, - model_display_name='my-trained-model', - model_labels={'key': 'value'}, - ) - - Usage without Dataset: - - job.run(replica_count=1, model_display_name='my-trained-model) - - - TODO(b/169782082) add documentation about traning utilities - To ensure your model gets saved in Vertex AI, write your saved model to - os.environ["AIP_MODEL_DIR"] in your provided training script. - + """Constructs a Forecasting Training Job. Args: display_name (str): - Required. The user-defined name of this TrainingPipeline. - script_path (str): Required. Local path to training script. - container_uri (str): - Required: Uri of the training container image in the GCR. - requirements (Sequence[str]): - List of python packages dependencies of script. - model_serving_container_image_uri (str): - If the training produces a managed Vertex AI Model, the URI of the - Model serving container suitable for serving the model produced by the - training script. - model_serving_container_predict_route (str): - If the training produces a managed Vertex AI Model, An HTTP path to - send prediction requests to the container, and which must be supported - by it. If not specified a default HTTP path will be used by Vertex AI. - model_serving_container_health_route (str): - If the training produces a managed Vertex AI Model, an HTTP path to - send health check requests to the container, and which must be supported - by it. If not specified a standard HTTP path will be used by AI - Platform. - model_serving_container_command (Sequence[str]): - The command with which the container is run. Not executed within a - shell. The Docker image's ENTRYPOINT is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container's - environment. If a variable cannot be resolved, the reference in the - input string will be unchanged. The $(VAR_NAME) syntax can be escaped - with a double $$, ie: $$(VAR_NAME). Escaped references will never be - expanded, regardless of whether the variable exists or not. - model_serving_container_args (Sequence[str]): - The arguments to the command. The Docker image's CMD is used if this is - not provided. Variable references $(VAR_NAME) are expanded using the - container's environment. If a variable cannot be resolved, the reference - in the input string will be unchanged. The $(VAR_NAME) syntax can be - escaped with a double $$, ie: $$(VAR_NAME). Escaped references will - never be expanded, regardless of whether the variable exists or not. - model_serving_container_environment_variables (Dict[str, str]): - The environment variables that are to be present in the container. - Should be a dictionary where keys are environment variable names - and values are environment variable values for those names. - model_serving_container_ports (Sequence[int]): - Declaration of ports that are exposed by the container. This field is - primarily informational, it gives Vertex AI information about the - network connections the container uses. Listing or not a port here has - no impact on whether the port is actually exposed, any port listening on - the default "0.0.0.0" address inside a container will be accessible from - the network. - model_description (str): - The description of the Model. - model_instance_schema_uri (str): - Optional. Points to a YAML file stored on Google Cloud - Storage describing the format of a single instance, which - are used in - ``PredictRequest.instances``, - ``ExplainRequest.instances`` - and - ``BatchPredictionJob.input_config``. - The schema is defined as an OpenAPI 3.0.2 `Schema - Object `__. - AutoML Models always have this field populated by AI - Platform. Note: The URI given on output will be immutable - and probably different, including the URI scheme, than the - one given on input. The output URI will point to a location - where the user only has a read access. - model_parameters_schema_uri (str): - Optional. Points to a YAML file stored on Google Cloud - Storage describing the parameters of prediction and - explanation via - ``PredictRequest.parameters``, - ``ExplainRequest.parameters`` - and - ``BatchPredictionJob.model_parameters``. - The schema is defined as an OpenAPI 3.0.2 `Schema - Object `__. - AutoML Models always have this field populated by AI - Platform, if no parameters are supported it is set to an - empty string. Note: The URI given on output will be - immutable and probably different, including the URI scheme, - than the one given on input. The output URI will point to a - location where the user only has a read access. - model_prediction_schema_uri (str): - Optional. Points to a YAML file stored on Google Cloud - Storage describing the format of a single prediction - produced by this Model, which are returned via - ``PredictResponse.predictions``, - ``ExplainResponse.explanations``, - and - ``BatchPredictionJob.output_config``. - The schema is defined as an OpenAPI 3.0.2 `Schema - Object `__. - AutoML Models always have this field populated by AI - Platform. Note: The URI given on output will be immutable - and probably different, including the URI scheme, than the - one given on input. The output URI will point to a location - where the user only has a read access. + Optional. The user-defined name of this TrainingPipeline. + optimization_objective (str): + Optional. Objective function the model is to be optimized towards. + The training process creates a Model that optimizes the value of the objective + function over the validation set. The supported optimization objectives: + "minimize-rmse" (default) - Minimize root-mean-squared error (RMSE). + "minimize-mae" - Minimize mean-absolute error (MAE). + "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE). + "minimize-rmspe" - Minimize root-mean-squared percentage error (RMSPE). + "minimize-wape-mae" - Minimize the combination of weighted absolute percentage error (WAPE) + and mean-absolute-error (MAE). + "minimize-quantile-loss" - Minimize the quantile loss at the defined quantiles. + (Set this objective to build quantile forecasts.) + column_specs (Dict[str, str]): + Optional. Alternative to column_transformations where the keys of the dict + are column names and their respective values are one of + AutoMLTabularTrainingJob.column_data_types. + When creating transformation for BigQuery Struct column, the column + should be flattened using "." as the delimiter. Only columns with no child + should have a transformation. + If an input column has no transformations on it, such a column is + ignored by the training, except for the targetColumn, which should have + no transformations defined on. + Only one of column_transformations or column_specs should be passed. + column_transformations (List[Dict[str, Dict[str, str]]]): + Optional. Transformations to apply to the input columns (i.e. columns other + than the targetColumn). Each transformation may produce multiple + result values from the column's value, and all are used for training. + When creating transformation for BigQuery Struct column, the column + should be flattened using "." as the delimiter. Only columns with no child + should have a transformation. + If an input column has no transformations on it, such a column is + ignored by the training, except for the targetColumn, which should have + no transformations defined on. + Only one of column_transformations or column_specs should be passed. + Consider using column_specs as column_transformations will be deprecated eventually. project (str): - Project to run training in. Overrides project set in aiplatform.init. + Optional. Project to run training in. Overrides project set in aiplatform.init. location (str): - Location to run training in. Overrides location set in aiplatform.init. + Optional. Location to run training in. Overrides location set in aiplatform.init. credentials (auth_credentials.Credentials): - Custom credentials to use to run call training service. Overrides + Optional. Custom credentials to use to run call training service. Overrides credentials set in aiplatform.init. labels (Dict[str, str]): Optional. The labels with user-defined metadata to @@ -1749,12 +1641,9 @@ def __init__( ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. - If set, this TrainingPipeline will be secured by this key. - Note: Model trained by this TrainingPipeline is also secured by this key if ``model_to_upload`` is not set separately. - Overrides encryption_spec_key_name set in aiplatform.init. model_encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer @@ -1763,16 +1652,11 @@ def __init__( ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. The key needs to be in the same region as where the compute resource is created. - If set, the trained Model will be secured by this key. - Overrides encryption_spec_key_name set in aiplatform.init. - staging_bucket (str): - Bucket used to stage source and training artifacts. Overrides - staging_bucket set in aiplatform.init. + Raises: + ValueError: If both column_transformations and column_specs were provided. """ - if not display_name: - display_name = self.__class__._generate_display_name() super().__init__( display_name=display_name, project=project, @@ -1781,74 +1665,76 @@ def __init__( labels=labels, training_encryption_spec_key_name=training_encryption_spec_key_name, model_encryption_spec_key_name=model_encryption_spec_key_name, - container_uri=container_uri, - model_instance_schema_uri=model_instance_schema_uri, - model_parameters_schema_uri=model_parameters_schema_uri, - model_prediction_schema_uri=model_prediction_schema_uri, - model_serving_container_environment_variables=model_serving_container_environment_variables, - model_serving_container_ports=model_serving_container_ports, - model_serving_container_image_uri=model_serving_container_image_uri, - model_serving_container_command=model_serving_container_command, - model_serving_container_args=model_serving_container_args, - model_serving_container_predict_route=model_serving_container_predict_route, - model_serving_container_health_route=model_serving_container_health_route, - model_description=model_description, - staging_bucket=staging_bucket, ) - self._requirements = requirements - self._script_path = script_path + self._column_transformations = ( + column_transformations_utils.validate_and_get_column_transformations( + column_specs, + column_transformations, + ) + ) + + self._optimization_objective = optimization_objective + self._additional_experiments = [] + + @property + @classmethod + @abc.abstractmethod + def _model_type(cls) -> str: + """The type of forecasting model.""" + pass + + @property + @classmethod + @abc.abstractmethod + def _training_task_definition(cls) -> str: + """A GCS path to the YAML file that defines the training task. + + The definition files that can be used here are found in + gs://google-cloud- aiplatform/schema/trainingjob/definition/. + """ + pass def run( self, - dataset: Optional[ - Union[ - datasets.ImageDataset, - datasets.TabularDataset, - datasets.TextDataset, - datasets.VideoDataset, - ] - ] = None, - annotation_schema_uri: Optional[str] = None, - model_display_name: Optional[str] = None, - model_labels: Optional[Dict[str, str]] = None, - base_output_dir: Optional[str] = None, - service_account: Optional[str] = None, - network: Optional[str] = None, - bigquery_destination: Optional[str] = None, - args: Optional[List[Union[str, float, int]]] = None, - environment_variables: Optional[Dict[str, str]] = None, - replica_count: int = 1, - machine_type: str = "n1-standard-4", - accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED", - accelerator_count: int = 0, - boot_disk_type: str = "pd-ssd", - boot_disk_size_gb: int = 100, - reduction_server_replica_count: int = 0, - reduction_server_machine_type: Optional[str] = None, - reduction_server_container_uri: Optional[str] = None, + dataset: datasets.TimeSeriesDataset, + target_column: str, + time_column: str, + time_series_identifier_column: str, + unavailable_at_forecast_columns: List[str], + available_at_forecast_columns: List[str], + forecast_horizon: int, + data_granularity_unit: str, + data_granularity_count: int, training_fraction_split: Optional[float] = None, validation_fraction_split: Optional[float] = None, test_fraction_split: Optional[float] = None, - training_filter_split: Optional[str] = None, - validation_filter_split: Optional[str] = None, - test_filter_split: Optional[str] = None, predefined_split_column_name: Optional[str] = None, timestamp_split_column_name: Optional[str] = None, - timeout: Optional[int] = None, - restart_job_on_worker_restart: bool = False, - enable_web_access: bool = False, - tensorboard: Optional[str] = None, - sync=True, + weight_column: Optional[str] = None, + time_series_attribute_columns: Optional[List[str]] = None, + context_window: Optional[int] = None, + export_evaluated_data_items: bool = False, + export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, + export_evaluated_data_items_override_destination: bool = False, + quantiles: Optional[List[float]] = None, + validation_options: Optional[str] = None, + budget_milli_node_hours: int = 1000, + model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, + additional_experiments: Optional[List[str]] = None, + hierarchy_group_columns: Optional[List[str]] = None, + hierarchy_group_total_weight: Optional[float] = None, + hierarchy_temporal_total_weight: Optional[float] = None, + hierarchy_group_temporal_total_weight: Optional[float] = None, + window_column: Optional[str] = None, + window_stride_length: Optional[int] = None, + window_max_count: Optional[int] = None, + holiday_regions: Optional[List[str]] = None, + sync: bool = True, create_request_timeout: Optional[float] = None, - ) -> Optional[models.Model]: - """Runs the custom training job. - - Distributed Training Support: - If replica count = 1 then one chief replica will be provisioned. If - replica_count > 1 the remainder will be provisioned as a worker replica pool. - ie: replica_count = 10 will result in 1 chief and 9 workers - All replicas have same machine_type, accelerator_type, and accelerator_count + ) -> models.Model: + """Runs the training job and returns a model. If training on a Vertex AI dataset, you can use one of the following split configurations: Data fraction splits: @@ -1858,17 +1744,6 @@ def run( decided by Vertex AI. If none of the fractions are set, by default roughly 80% of data will be used for training, 10% for validation, and 10% for test. - Data filter splits: - Assigns input data to training, validation, and test sets - based on the given filters, data pieces not matched by any - filter are ignored. Currently only supported for Datasets - containing DataItems. - If any of the filters in this message are to match nothing, then - they can be set as '-' (the minus sign). - If using filter splits, all of ``training_filter_split``, ``validation_filter_split`` and - ``test_filter_split`` must be provided. - Supported only for unstructured Datasets. - Predefined splits: Assigns input data to training, validation, and test sets based on the value of a provided key. If using predefined splits, ``predefined_split_column_name`` must be provided. @@ -1882,48 +1757,125 @@ def run( Supported only for tabular Datasets. Args: - dataset ( - Union[ - datasets.ImageDataset, - datasets.TabularDataset, - datasets.TextDataset, - datasets.VideoDataset, - ] - ): - Vertex AI to fit this training against. Custom training script should - retrieve datasets through passed in environment variables uris: + dataset (datasets.TimeSeriesDataset): + Required. The dataset within the same Project from which data will be used to train the Model. The + Dataset must use schema compatible with Model being trained, + and what is compatible should be described in the used + TrainingPipeline's [training_task_definition] + [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. + For time series Datasets, all their data is exported to + training, to pick and choose from. + target_column (str): + Required. Name of the column that the Model is to predict values for. This + column must be unavailable at forecast. + time_column (str): + Required. Name of the column that identifies time order in the time series. + This column must be available at forecast. + time_series_identifier_column (str): + Required. Name of the column that identifies the time series. + unavailable_at_forecast_columns (List[str]): + Required. Column names of columns that are unavailable at forecast. + Each column contains information for the given entity (identified by the + [time_series_identifier_column]) that is unknown before the forecast + (e.g. population of a city in a given year, or weather on a given day). + available_at_forecast_columns (List[str]): + Required. Column names of columns that are available at forecast. + Each column contains information for the given entity (identified by the + [time_series_identifier_column]) that is known at forecast. + forecast_horizon: (int): + Required. The amount of time into the future for which forecasted values for the target are + returned. Expressed in number of units defined by the [data_granularity_unit] and + [data_granularity_count] field. Inclusive. + data_granularity_unit (str): + Required. The data granularity unit. Accepted values are ``minute``, + ``hour``, ``day``, ``week``, ``month``, ``year``. + data_granularity_count (int): + Required. The number of data granularity units between data points in the training + data. If [data_granularity_unit] is `minute`, can be 1, 5, 10, 15, or 30. For all other + values of [data_granularity_unit], must be 1. + predefined_split_column_name (str): + Optional. The key is a name of one of the Dataset's data + columns. The value of the key (either the label's value or + value in the column) must be one of {``TRAIN``, + ``VALIDATE``, ``TEST``}, and it defines to which set the + given piece of data is assigned. If for a piece of data the + key is not present or has an invalid value, that piece is + ignored by the pipeline. - os.environ["AIP_TRAINING_DATA_URI"] - os.environ["AIP_VALIDATION_DATA_URI"] - os.environ["AIP_TEST_DATA_URI"] + Supported only for tabular and time series Datasets. + timestamp_split_column_name (str): + Optional. The key is a name of one of the Dataset's data + columns. The value of the key values of the key (the values in + the column) must be in RFC 3339 `date-time` format, where + `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a + piece of data the key is not present or has an invalid value, + that piece is ignored by the pipeline. + Supported only for tabular and time series Datasets. + This parameter must be used with training_fraction_split, + validation_fraction_split, and test_fraction_split. + weight_column (str): + Optional. Name of the column that should be used as the weight column. + Higher values in this column give more importance to the row + during Model training. The column must have numeric values between 0 and + 10000 inclusively, and 0 value means that the row is ignored. + If the weight column field is not set, then all rows are assumed to have + equal weight of 1. This column must be available at forecast. + time_series_attribute_columns (List[str]): + Optional. Column names that should be used as attribute columns. + Each column is constant within a time series. + context_window (int): + Optional. The amount of time into the past training and prediction data is used for + model training and prediction respectively. Expressed in number of units defined by the + [data_granularity_unit] and [data_granularity_count] fields. When not provided uses the + default value of 0 which means the model sets each series context window to be 0 (also + known as "cold start"). Inclusive. + export_evaluated_data_items (bool): + Whether to export the test set predictions to a BigQuery table. + If False, then the export is not performed. + export_evaluated_data_items_bigquery_destination_uri (string): + Optional. URI of desired destination BigQuery table for exported test set predictions. - Additionally the dataset format is passed in as: + Expected format: + ``bq://::`` - os.environ["AIP_DATA_FORMAT"] - annotation_schema_uri (str): - Google Cloud Storage URI points to a YAML file describing - annotation schema. The schema is defined as an OpenAPI 3.0.2 - [Schema Object](https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.0.2.md#schema-object) The schema files - that can be used here are found in - gs://google-cloud-aiplatform/schema/dataset/annotation/, - note that the chosen schema must be consistent with - ``metadata`` - of the Dataset specified by - ``dataset_id``. + If not specified, then results are exported to the following auto-created BigQuery + table: + ``:export_evaluated_examples__.evaluated_examples`` - Only Annotations that both match this schema and belong to - DataItems not ignored by the split method are used in - respectively training, validation or test role, depending on - the role of the DataItem they are on. + Applies only if [export_evaluated_data_items] is True. + export_evaluated_data_items_override_destination (bool): + Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], + if the table exists, for exported test set predictions. If False, and the + table exists, then the training job will fail. - When used in conjunction with - ``annotations_filter``, - the Annotations used for training are filtered by both - ``annotations_filter`` - and - ``annotation_schema_uri``. + Applies only if [export_evaluated_data_items] is True and + [export_evaluated_data_items_bigquery_destination_uri] is specified. + quantiles (List[float]): + Quantiles to use for the `minimize-quantile-loss` + [AutoMLForecastingTrainingJob.optimization_objective]. This argument is required in + this case. + + Accepts up to 5 quantiles in the form of a double from 0 to 1, exclusive. + Each quantile must be unique. + validation_options (str): + Validation options for the data validation component. The available options are: + "fail-pipeline" - (default), will validate against the validation and fail the pipeline + if it fails. + "ignore-validation" - ignore the results of the validation and continue the pipeline + budget_milli_node_hours (int): + Optional. The train budget of creating this Model, expressed in milli node + hours i.e. 1,000 value in this field means 1 node hour. + The training cost of the model will not exceed this budget. The final + cost will be attempted to be close to the budget, though may end up + being (even) noticeably smaller - at the backend's discretion. This + especially may happen when further model training ceases to provide + any improvements. + If the budget is set to a value known to be insufficient to train a + Model for the given training set, the training won't be attempted and + will error. + The minimum value is 1000 and the maximum is 72000. model_display_name (str): - If the script produces a managed Vertex AI Model. The display name of + Optional. If the script produces a managed Vertex AI Model. The display name of the Model. The name can be up to 128 characters long and can be consist of any UTF-8 characters. @@ -1938,315 +1890,213 @@ def run( are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. - base_output_dir (str): - GCS output directory of job. If not provided a - timestamped directory in the staging directory will be used. - - Vertex AI sets the following environment variables when it runs your training code: - - - AIP_MODEL_DIR: a Cloud Storage URI of a directory intended for saving model artifacts, i.e. /model/ - - AIP_CHECKPOINT_DIR: a Cloud Storage URI of a directory intended for saving checkpoints, i.e. /checkpoints/ - - AIP_TENSORBOARD_LOG_DIR: a Cloud Storage URI of a directory intended for saving TensorBoard logs, i.e. /logs/ - - service_account (str): - Specifies the service account for workload run-as account. - Users submitting jobs must have act-as permission on this run-as account. - network (str): - The full name of the Compute Engine network to which the job - should be peered. For example, projects/12345/global/networks/myVPC. - Private services access must already be configured for the network. - If left unspecified, the job is not peered with any network. - bigquery_destination (str): - Provide this field if `dataset` is a BiqQuery dataset. - The BigQuery project location where the training data is to - be written to. In the given project a new dataset is created - with name - ``dataset___`` - where timestamp is in YYYY_MM_DDThh_mm_ss_sssZ format. All - training input data will be written into that dataset. In - the dataset three tables will be created, ``training``, - ``validation`` and ``test``. - - - AIP_DATA_FORMAT = "bigquery". - - AIP_TRAINING_DATA_URI ="bigquery_destination.dataset_*.training" - - AIP_VALIDATION_DATA_URI = "bigquery_destination.dataset_*.validation" - - AIP_TEST_DATA_URI = "bigquery_destination.dataset_*.test" - args (List[Unions[str, int, float]]): - Command line arguments to be passed to the Python script. - environment_variables (Dict[str, str]): - Environment variables to be passed to the container. - Should be a dictionary where keys are environment variable names - and values are environment variable values for those names. - At most 10 environment variables can be specified. - The Name of the environment variable must be unique. - - environment_variables = { - 'MY_KEY': 'MY_VALUE' - } - replica_count (int): - The number of worker replicas. If replica count = 1 then one chief - replica will be provisioned. If replica_count > 1 the remainder will be - provisioned as a worker replica pool. - machine_type (str): - The type of machine to use for training. - accelerator_type (str): - Hardware accelerator type. One of ACCELERATOR_TYPE_UNSPECIFIED, - NVIDIA_TESLA_K80, NVIDIA_TESLA_P100, NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, - NVIDIA_TESLA_T4 - accelerator_count (int): - The number of accelerators to attach to a worker replica. - boot_disk_type (str): - Type of the boot disk, default is `pd-ssd`. - Valid values: `pd-ssd` (Persistent Disk Solid State Drive) or - `pd-standard` (Persistent Disk Hard Disk Drive). - boot_disk_size_gb (int): - Size in GB of the boot disk, default is 100GB. - boot disk size must be within the range of [100, 64000]. - reduction_server_replica_count (int): - The number of reduction server replicas, default is 0. - reduction_server_machine_type (str): - Optional. The type of machine to use for reduction server. - reduction_server_container_uri (str): - Optional. The Uri of the reduction server container image. - See details: https://cloud.google.com/vertex-ai/docs/training/distributed-training#reduce_training_time_with_reduction_server - training_fraction_split (float): - Optional. The fraction of the input data that is to be used to train - the Model. This is ignored if Dataset is not provided. - validation_fraction_split (float): - Optional. The fraction of the input data that is to be used to validate - the Model. This is ignored if Dataset is not provided. - test_fraction_split (float): - Optional. The fraction of the input data that is to be used to evaluate - the Model. This is ignored if Dataset is not provided. - training_filter_split (str): - Optional. A filter on DataItems of the Dataset. DataItems that match - this filter are used to train the Model. A filter with same syntax - as the one used in DatasetService.ListDataItems may be used. If a - single DataItem is matched by more than one of the FilterSplit filters, - then it is assigned to the first set that applies to it in the training, - validation, test order. This is ignored if Dataset is not provided. - validation_filter_split (str): - Optional. A filter on DataItems of the Dataset. DataItems that match - this filter are used to validate the Model. A filter with same syntax - as the one used in DatasetService.ListDataItems may be used. If a - single DataItem is matched by more than one of the FilterSplit filters, - then it is assigned to the first set that applies to it in the training, - validation, test order. This is ignored if Dataset is not provided. - test_filter_split (str): - Optional. A filter on DataItems of the Dataset. DataItems that match - this filter are used to test the Model. A filter with same syntax - as the one used in DatasetService.ListDataItems may be used. If a - single DataItem is matched by more than one of the FilterSplit filters, - then it is assigned to the first set that applies to it in the training, - validation, test order. This is ignored if Dataset is not provided. - predefined_split_column_name (str): - Optional. The key is a name of one of the Dataset's data - columns. The value of the key (either the label's value or - value in the column) must be one of {``training``, - ``validation``, ``test``}, and it defines to which set the - given piece of data is assigned. If for a piece of data the - key is not present or has an invalid value, that piece is - ignored by the pipeline. - - Supported only for tabular and time series Datasets. - timestamp_split_column_name (str): - Optional. The key is a name of one of the Dataset's data - columns. The value of the key values of the key (the values in - the column) must be in RFC 3339 `date-time` format, where - `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a - piece of data the key is not present or has an invalid value, - that piece is ignored by the pipeline. - - Supported only for tabular and time series Datasets. - timeout (int): - The maximum job running time in seconds. The default is 7 days. - restart_job_on_worker_restart (bool): - Restarts the entire CustomJob if a worker - gets restarted. This feature can be used by - distributed training jobs that are not resilient - to workers leaving and joining a job. - enable_web_access (bool): - Whether you want Vertex AI to enable interactive shell access - to training containers. - https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell - tensorboard (str): - Optional. The name of a Vertex AI - [Tensorboard][google.cloud.aiplatform.v1beta1.Tensorboard] - resource to which this CustomJob will upload Tensorboard - logs. Format: - ``projects/{project}/locations/{location}/tensorboards/{tensorboard}`` - - The training script should write Tensorboard to following Vertex AI environment - variable: - - AIP_TENSORBOARD_LOG_DIR - - `service_account` is required with provided `tensorboard`. - For more information on configuring your service account please visit: - https://cloud.google.com/vertex-ai/docs/experiments/tensorboard-training + additional_experiments (List[str]): + Optional. Additional experiment flags for the time series forcasting training. create_request_timeout (float): Optional. The timeout for the create request in seconds. + hierarchy_group_columns (List[str]): + Optional. A list of time series attribute column names that + define the time series hierarchy. Only one level of hierarchy is + supported, ex. ``region`` for a hierarchy of stores or + ``department`` for a hierarchy of products. If multiple columns + are specified, time series will be grouped by their combined + values, ex. (``blue``, ``large``) for ``color`` and ``size``, up + to 5 columns are accepted. If no group columns are specified, + all time series are considered to be part of the same group. + hierarchy_group_total_weight (float): + Optional. The weight of the loss for predictions aggregated over + time series in the same hierarchy group. + hierarchy_temporal_total_weight (float): + Optional. The weight of the loss for predictions aggregated over + the horizon for a single time series. + hierarchy_group_temporal_total_weight (float): + Optional. The weight of the loss for predictions aggregated over + both the horizon and time series in the same hierarchy group. + window_column (str): + Optional. Name of the column that should be used to filter input + rows. The column should contain either booleans or string + booleans; if the value of the row is True, generate a sliding + window from that row. + window_stride_length (int): + Optional. Step length used to generate input examples. Every + ``window_stride_length`` rows will be used to generate a sliding + window. + window_max_count (int): + Optional. Number of rows that should be used to generate input + examples. If the total row count is larger than this number, the + input data will be randomly sampled to hit the count. + holiday_regions (List[str]): + Optional. The geographical regions to use when creating holiday + features. This option is only allowed when data_granularity_unit + is ``day``. Acceptable values can come from any of the following + levels: + Top level: GLOBAL + Second level: continental regions + NA: North America + JAPAC: Japan and Asia Pacific + EMEA: Europe, the Middle East and Africa + LAC: Latin America and the Caribbean + Third level: countries from ISO 3166-1 Country codes. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. - Returns: model: The trained Vertex AI Model resource or None if training did not produce a Vertex AI Model. + + Raises: + RuntimeError: If Training job has already been run or is waiting to run. """ - worker_pool_specs, managed_model = self._prepare_and_validate_run( - model_display_name=model_display_name, - model_labels=model_labels, - replica_count=replica_count, - machine_type=machine_type, - accelerator_count=accelerator_count, - accelerator_type=accelerator_type, - boot_disk_type=boot_disk_type, - boot_disk_size_gb=boot_disk_size_gb, - reduction_server_replica_count=reduction_server_replica_count, - reduction_server_machine_type=reduction_server_machine_type, - ) - # make and copy package - python_packager = source_utils._TrainingScriptPythonPackager( - script_path=self._script_path, requirements=self._requirements - ) + if model_display_name: + utils.validate_display_name(model_display_name) + if model_labels: + utils.validate_labels(model_labels) + + if self._is_waiting_to_run(): + raise RuntimeError( + f"{self.__class__._model_type} Forecasting Training is already " + "scheduled to run." + ) + + if self._has_run: + raise RuntimeError( + f"{self.__class__._model_type} Forecasting Training has " + "already run." + ) + + if additional_experiments: + self._add_additional_experiments(additional_experiments) return self._run( - python_packager=python_packager, dataset=dataset, - annotation_schema_uri=annotation_schema_uri, - worker_pool_specs=worker_pool_specs, - managed_model=managed_model, - args=args, - environment_variables=environment_variables, - base_output_dir=base_output_dir, - service_account=service_account, - network=network, - bigquery_destination=bigquery_destination, + target_column=target_column, + time_column=time_column, + time_series_identifier_column=time_series_identifier_column, + unavailable_at_forecast_columns=unavailable_at_forecast_columns, + available_at_forecast_columns=available_at_forecast_columns, + forecast_horizon=forecast_horizon, + data_granularity_unit=data_granularity_unit, + data_granularity_count=data_granularity_count, training_fraction_split=training_fraction_split, validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, - training_filter_split=training_filter_split, - validation_filter_split=validation_filter_split, - test_filter_split=test_filter_split, predefined_split_column_name=predefined_split_column_name, timestamp_split_column_name=timestamp_split_column_name, - timeout=timeout, - restart_job_on_worker_restart=restart_job_on_worker_restart, - enable_web_access=enable_web_access, - tensorboard=tensorboard, - reduction_server_container_uri=reduction_server_container_uri - if reduction_server_replica_count > 0 - else None, + weight_column=weight_column, + time_series_attribute_columns=time_series_attribute_columns, + context_window=context_window, + budget_milli_node_hours=budget_milli_node_hours, + export_evaluated_data_items=export_evaluated_data_items, + export_evaluated_data_items_bigquery_destination_uri=export_evaluated_data_items_bigquery_destination_uri, + export_evaluated_data_items_override_destination=export_evaluated_data_items_override_destination, + quantiles=quantiles, + validation_options=validation_options, + model_display_name=model_display_name, + model_labels=model_labels, sync=sync, create_request_timeout=create_request_timeout, ) - @base.optional_sync(construct_object_on_arg="managed_model") + @base.optional_sync() def _run( self, - python_packager: source_utils._TrainingScriptPythonPackager, - dataset: Optional[ - Union[ - datasets.ImageDataset, - datasets.TabularDataset, - datasets.TextDataset, - datasets.VideoDataset, - ] - ], - annotation_schema_uri: Optional[str], - worker_pool_specs: worker_spec_utils._DistributedTrainingSpec, - managed_model: Optional[gca_model.Model] = None, - args: Optional[List[Union[str, float, int]]] = None, - environment_variables: Optional[Dict[str, str]] = None, - base_output_dir: Optional[str] = None, - service_account: Optional[str] = None, - network: Optional[str] = None, - bigquery_destination: Optional[str] = None, - training_fraction_split: Optional[float] = None, - validation_fraction_split: Optional[float] = None, - test_fraction_split: Optional[float] = None, - training_filter_split: Optional[str] = None, - validation_filter_split: Optional[str] = None, - test_filter_split: Optional[str] = None, + dataset: datasets.TimeSeriesDataset, + target_column: str, + time_column: str, + time_series_identifier_column: str, + unavailable_at_forecast_columns: List[str], + available_at_forecast_columns: List[str], + forecast_horizon: int, + data_granularity_unit: str, + data_granularity_count: int, + training_fraction_split: Optional[float] = None, + validation_fraction_split: Optional[float] = None, + test_fraction_split: Optional[float] = None, predefined_split_column_name: Optional[str] = None, timestamp_split_column_name: Optional[str] = None, - timeout: Optional[int] = None, - restart_job_on_worker_restart: bool = False, - enable_web_access: bool = False, - tensorboard: Optional[str] = None, - reduction_server_container_uri: Optional[str] = None, - sync=True, + weight_column: Optional[str] = None, + time_series_attribute_columns: Optional[List[str]] = None, + context_window: Optional[int] = None, + export_evaluated_data_items: bool = False, + export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, + export_evaluated_data_items_override_destination: bool = False, + quantiles: Optional[List[float]] = None, + validation_options: Optional[str] = None, + budget_milli_node_hours: int = 1000, + model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, + hierarchy_group_columns: Optional[List[str]] = None, + hierarchy_group_total_weight: Optional[float] = None, + hierarchy_temporal_total_weight: Optional[float] = None, + hierarchy_group_temporal_total_weight: Optional[float] = None, + window_column: Optional[str] = None, + window_stride_length: Optional[int] = None, + window_max_count: Optional[int] = None, + holiday_regions: Optional[List[str]] = None, + sync: bool = True, create_request_timeout: Optional[float] = None, - ) -> Optional[models.Model]: - """Packages local script and launches training_job. - - Args: - python_packager (source_utils._TrainingScriptPythonPackager): - Required. Python Packager pointing to training script locally. - dataset ( - Union[ - datasets.ImageDataset, - datasets.TabularDataset, - datasets.TextDataset, - datasets.VideoDataset, - ] - ): - Vertex AI to fit this training against. - annotation_schema_uri (str): - Google Cloud Storage URI points to a YAML file describing - annotation schema. - worker_pools_spec (worker_spec_utils._DistributedTrainingSpec): - Worker pools pecs required to run job. - managed_model (gca_model.Model): - Model proto if this script produces a Managed Model. - args (List[Unions[str, int, float]]): - Command line arguments to be passed to the Python script. - environment_variables (Dict[str, str]): - Environment variables to be passed to the container. - Should be a dictionary where keys are environment variable names - and values are environment variable values for those names. - At most 10 environment variables can be specified. - The Name of the environment variable must be unique. - - environment_variables = { - 'MY_KEY': 'MY_VALUE' - } - base_output_dir (str): - GCS output directory of job. If not provided a - timestamped directory in the staging directory will be used. + ) -> models.Model: + """Runs the training job and returns a model. - Vertex AI sets the following environment variables when it runs your training code: + If training on a Vertex AI dataset, you can use one of the following split configurations: + Data fraction splits: + Any of ``training_fraction_split``, ``validation_fraction_split`` and + ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If + the provided ones sum to less than 1, the remainder is assigned to sets as + decided by Vertex AI. If none of the fractions are set, by default roughly 80% + of data will be used for training, 10% for validation, and 10% for test. - - AIP_MODEL_DIR: a Cloud Storage URI of a directory intended for saving model artifacts, i.e. /model/ - - AIP_CHECKPOINT_DIR: a Cloud Storage URI of a directory intended for saving checkpoints, i.e. /checkpoints/ - - AIP_TENSORBOARD_LOG_DIR: a Cloud Storage URI of a directory intended for saving TensorBoard logs, i.e. /logs/ + Predefined splits: + Assigns input data to training, validation, and test sets based on the value of a provided key. + If using predefined splits, ``predefined_split_column_name`` must be provided. + Supported only for tabular Datasets. - service_account (str): - Specifies the service account for workload run-as account. - Users submitting jobs must have act-as permission on this run-as account. - network (str): - The full name of the Compute Engine network to which the job - should be peered. For example, projects/12345/global/networks/myVPC. - Private services access must already be configured for the network. - If left unspecified, the job is not peered with any network. - bigquery_destination (str): - Provide this field if `dataset` is a BiqQuery dataset. - The BigQuery project location where the training data is to - be written to. In the given project a new dataset is created - with name - ``dataset___`` - where timestamp is in YYYY_MM_DDThh_mm_ss_sssZ format. All - training input data will be written into that dataset. In - the dataset three tables will be created, ``training``, - ``validation`` and ``test``. + Timestamp splits: + Assigns input data to training, validation, and test sets + based on a provided timestamps. The youngest data pieces are + assigned to training set, next to validation set, and the oldest + to the test set. + Supported only for tabular Datasets. - - AIP_DATA_FORMAT = "bigquery". - - AIP_TRAINING_DATA_URI ="bigquery_destination.dataset_*.training" - - AIP_VALIDATION_DATA_URI = "bigquery_destination.dataset_*.validation" - - AIP_TEST_DATA_URI = "bigquery_destination.dataset_*.test" + Args: + dataset (datasets.TimeSeriesDataset): + Required. The dataset within the same Project from which data will be used to train the Model. The + Dataset must use schema compatible with Model being trained, + and what is compatible should be described in the used + TrainingPipeline's [training_task_definition] + [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. + For time series Datasets, all their data is exported to + training, to pick and choose from. + target_column (str): + Required. Name of the column that the Model is to predict values for. This + column must be unavailable at forecast. + time_column (str): + Required. Name of the column that identifies time order in the time series. + This column must be available at forecast. + time_series_identifier_column (str): + Required. Name of the column that identifies the time series. + unavailable_at_forecast_columns (List[str]): + Required. Column names of columns that are unavailable at forecast. + Each column contains information for the given entity (identified by the + [time_series_identifier_column]) that is unknown before the forecast + (e.g. population of a city in a given year, or weather on a given day). + available_at_forecast_columns (List[str]): + Required. Column names of columns that are available at forecast. + Each column contains information for the given entity (identified by the + [time_series_identifier_column]) that is known at forecast. + forecast_horizon: (int): + Required. The amount of time into the future for which forecasted values for the target are + returned. Expressed in number of units defined by the [data_granularity_unit] and + [data_granularity_count] field. Inclusive. + data_granularity_unit (str): + Required. The data granularity unit. Accepted values are ``minute``, + ``hour``, ``day``, ``week``, ``month``, ``year``. + data_granularity_count (int): + Required. The number of data granularity units between data points in the training + data. If [data_granularity_unit] is `minute`, can be 1, 5, 10, 15, or 30. For all other + values of [data_granularity_unit], must be 1. training_fraction_split (float): Optional. The fraction of the input data that is to be used to train the Model. This is ignored if Dataset is not provided. @@ -2256,27 +2106,6 @@ def _run( test_fraction_split (float): Optional. The fraction of the input data that is to be used to evaluate the Model. This is ignored if Dataset is not provided. - training_filter_split (str): - Optional. A filter on DataItems of the Dataset. DataItems that match - this filter are used to train the Model. A filter with same syntax - as the one used in DatasetService.ListDataItems may be used. If a - single DataItem is matched by more than one of the FilterSplit filters, - then it is assigned to the first set that applies to it in the training, - validation, test order. This is ignored if Dataset is not provided. - validation_filter_split (str): - Optional. A filter on DataItems of the Dataset. DataItems that match - this filter are used to validate the Model. A filter with same syntax - as the one used in DatasetService.ListDataItems may be used. If a - single DataItem is matched by more than one of the FilterSplit filters, - then it is assigned to the first set that applies to it in the training, - validation, test order. This is ignored if Dataset is not provided. - test_filter_split (str): - Optional. A filter on DataItems of the Dataset. DataItems that match - this filter are used to test the Model. A filter with same syntax - as the one used in DatasetService.ListDataItems may be used. If a - single DataItem is matched by more than one of the FilterSplit filters, - then it is assigned to the first set that applies to it in the training, - validation, test order. This is ignored if Dataset is not provided. predefined_split_column_name (str): Optional. The key is a name of one of the Dataset's data columns. The value of the key (either the label's value or @@ -2294,57 +2123,1080 @@ def _run( `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a piece of data the key is not present or has an invalid value, that piece is ignored by the pipeline. - Supported only for tabular and time series Datasets. - timeout (int): - The maximum job running time in seconds. The default is 7 days. - restart_job_on_worker_restart (bool): - Restarts the entire CustomJob if a worker - gets restarted. This feature can be used by - distributed training jobs that are not resilient - to workers leaving and joining a job. - enable_web_access (bool): - Whether you want Vertex AI to enable interactive shell access - to training containers. - https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell - tensorboard (str): - Optional. The name of a Vertex AI - [Tensorboard][google.cloud.aiplatform.v1beta1.Tensorboard] - resource to which this CustomJob will upload Tensorboard - logs. Format: - ``projects/{project}/locations/{location}/tensorboards/{tensorboard}`` - - The training script should write Tensorboard to following Vertex AI environment - variable: - - AIP_TENSORBOARD_LOG_DIR + This parameter must be used with training_fraction_split, + validation_fraction_split, and test_fraction_split. + weight_column (str): + Optional. Name of the column that should be used as the weight column. + Higher values in this column give more importance to the row + during Model training. The column must have numeric values between 0 and + 10000 inclusively, and 0 value means that the row is ignored. + If the weight column field is not set, then all rows are assumed to have + equal weight of 1. This column must be available at forecast. + time_series_attribute_columns (List[str]): + Optional. Column names that should be used as attribute columns. + Each column is constant within a time series. + context_window (int): + Optional. The number of periods offset into the past to restrict past sequence, where each + period is one unit of granularity as defined by [period]. When not provided uses the + default value of 0 which means the model sets each series historical window to be 0 (also + known as "cold start"). Inclusive. + export_evaluated_data_items (bool): + Whether to export the test set predictions to a BigQuery table. + If False, then the export is not performed. + export_evaluated_data_items_bigquery_destination_uri (string): + Optional. URI of desired destination BigQuery table for exported test set predictions. - `service_account` is required with provided `tensorboard`. - For more information on configuring your service account please visit: - https://cloud.google.com/vertex-ai/docs/experiments/tensorboard-training - reduction_server_container_uri (str): - Optional. The Uri of the reduction server container image. - sync (bool): - Whether to execute this method synchronously. If False, this method - will be executed in concurrent Future and any downstream object will - be immediately returned and synced when the Future has completed. - create_request_timeout (float) - Optional. The timeout for the create request in seconds + Expected format: + ``bq://::
`` - Returns: - model: The trained Vertex AI Model resource or None if training did not - produce a Vertex AI Model. - """ - package_gcs_uri = python_packager.package_and_copy_to_gcs( - gcs_staging_dir=self._staging_bucket, - project=self.project, - credentials=self.credentials, - ) + If not specified, then results are exported to the following auto-created BigQuery + table: + ``:export_evaluated_examples__.evaluated_examples`` - for spec_order, spec in enumerate(worker_pool_specs): + Applies only if [export_evaluated_data_items] is True. + export_evaluated_data_items_override_destination (bool): + Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], + if the table exists, for exported test set predictions. If False, and the + table exists, then the training job will fail. - if not spec: - continue + Applies only if [export_evaluated_data_items] is True and + [export_evaluated_data_items_bigquery_destination_uri] is specified. + quantiles (List[float]): + Quantiles to use for the `minimize-quantile-loss` + [AutoMLForecastingTrainingJob.optimization_objective]. This argument is required in + this case. + + Accepts up to 5 quantiles in the form of a double from 0 to 1, exclusive. + Each quantile must be unique. + validation_options (str): + Validation options for the data validation component. The available options are: + "fail-pipeline" - (default), will validate against the validation and fail the pipeline + if it fails. + "ignore-validation" - ignore the results of the validation and continue the pipeline + budget_milli_node_hours (int): + Optional. The train budget of creating this Model, expressed in milli node + hours i.e. 1,000 value in this field means 1 node hour. + The training cost of the model will not exceed this budget. The final + cost will be attempted to be close to the budget, though may end up + being (even) noticeably smaller - at the backend's discretion. This + especially may happen when further model training ceases to provide + any improvements. + If the budget is set to a value known to be insufficient to train a + Model for the given training set, the training won't be attempted and + will error. + The minimum value is 1000 and the maximum is 72000. + model_display_name (str): + Optional. If the script produces a managed Vertex AI Model. The display name of + the Model. The name can be up to 128 characters long and can be consist + of any UTF-8 characters. + + If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. + hierarchy_group_columns (List[str]): + Optional. A list of time series attribute column names that + define the time series hierarchy. Only one level of hierarchy is + supported, ex. ``region`` for a hierarchy of stores or + ``department`` for a hierarchy of products. If multiple columns + are specified, time series will be grouped by their combined + values, ex. (``blue``, ``large``) for ``color`` and ``size``, up + to 5 columns are accepted. If no group columns are specified, + all time series are considered to be part of the same group. + hierarchy_group_total_weight (float): + Optional. The weight of the loss for predictions aggregated over + time series in the same hierarchy group. + hierarchy_temporal_total_weight (float): + Optional. The weight of the loss for predictions aggregated over + the horizon for a single time series. + hierarchy_group_temporal_total_weight (float): + Optional. The weight of the loss for predictions aggregated over + both the horizon and time series in the same hierarchy group. + window_column (str): + Optional. Name of the column that should be used to filter input + rows. The column should contain either booleans or string + booleans; if the value of the row is True, generate a sliding + window from that row. + window_stride_length (int): + Optional. Step length used to generate input examples. Every + ``window_stride_length`` rows will be used to generate a sliding + window. + window_max_count (int): + Optional. Number of rows that should be used to generate input + examples. If the total row count is larger than this number, the + input data will be randomly sampled to hit the count. + holiday_regions (List[str]): + Optional. The geographical regions to use when creating holiday + features. This option is only allowed when data_granularity_unit + is ``day``. Acceptable values can come from any of the following + levels: + Top level: GLOBAL + Second level: continental regions + NA: North America + JAPAC: Japan and Asia Pacific + EMEA: Europe, the Middle East and Africa + LAC: Latin America and the Caribbean + Third level: countries from ISO 3166-1 Country codes. + sync (bool): + Whether to execute this method synchronously. If False, this method + will be executed in concurrent Future and any downstream object will + be immediately returned and synced when the Future has completed. + create_request_timeout (float): + Optional. The timeout for the create request in seconds. + Returns: + model: The trained Vertex AI Model resource or None if training did not + produce a Vertex AI Model. + """ + # auto-populate transformations + if self._column_transformations is None: + _LOGGER.info( + "No column transformations provided, so now retrieving columns from dataset in order to set default column transformations." + ) + + ( + self._column_transformations, + column_names, + ) = dataset._get_default_column_transformations(target_column) + + _LOGGER.info( + "The column transformation of type 'auto' was set for the following columns: %s." + % column_names + ) + + window_config = self._create_window_config( + column=window_column, + stride_length=window_stride_length, + max_count=window_max_count, + ) + + training_task_inputs_dict = { + # required inputs + "targetColumn": target_column, + "timeColumn": time_column, + "timeSeriesIdentifierColumn": time_series_identifier_column, + "timeSeriesAttributeColumns": time_series_attribute_columns, + "unavailableAtForecastColumns": unavailable_at_forecast_columns, + "availableAtForecastColumns": available_at_forecast_columns, + "forecastHorizon": forecast_horizon, + "dataGranularity": { + "unit": data_granularity_unit, + "quantity": data_granularity_count, + }, + "transformations": self._column_transformations, + "trainBudgetMilliNodeHours": budget_milli_node_hours, + # optional inputs + "weightColumn": weight_column, + "contextWindow": context_window, + "quantiles": quantiles, + "validationOptions": validation_options, + "optimizationObjective": self._optimization_objective, + "holidayRegions": holiday_regions, + } + + # TODO(TheMichaelHu): Remove the ifs once the API supports these inputs. + if any( + [ + hierarchy_group_columns, + hierarchy_group_total_weight, + hierarchy_temporal_total_weight, + hierarchy_group_temporal_total_weight, + ] + ): + training_task_inputs_dict["hierarchyConfig"] = { + "groupColumns": hierarchy_group_columns, + "groupTotalWeight": hierarchy_group_total_weight, + "temporalTotalWeight": hierarchy_temporal_total_weight, + "groupTemporalTotalWeight": hierarchy_group_temporal_total_weight, + } + if window_config: + training_task_inputs_dict["windowConfig"] = window_config + + final_export_eval_bq_uri = export_evaluated_data_items_bigquery_destination_uri + if final_export_eval_bq_uri and not final_export_eval_bq_uri.startswith( + "bq://" + ): + final_export_eval_bq_uri = f"bq://{final_export_eval_bq_uri}" + + if export_evaluated_data_items: + training_task_inputs_dict["exportEvaluatedDataItemsConfig"] = { + "destinationBigqueryUri": final_export_eval_bq_uri, + "overrideExistingTable": export_evaluated_data_items_override_destination, + } + + if self._additional_experiments: + training_task_inputs_dict[ + "additionalExperiments" + ] = self._additional_experiments + + model = gca_model.Model( + display_name=model_display_name or self._display_name, + labels=model_labels or self._labels, + encryption_spec=self._model_encryption_spec, + ) + + new_model = self._run_job( + training_task_definition=self.__class__._training_task_definition, + training_task_inputs=training_task_inputs_dict, + dataset=dataset, + training_fraction_split=training_fraction_split, + validation_fraction_split=validation_fraction_split, + test_fraction_split=test_fraction_split, + predefined_split_column_name=predefined_split_column_name, + timestamp_split_column_name=timestamp_split_column_name, + model=model, + create_request_timeout=create_request_timeout, + ) + + if export_evaluated_data_items: + _LOGGER.info( + "Exported examples available at:\n%s" + % self.evaluated_data_items_bigquery_uri + ) + + return new_model + + @property + def _model_upload_fail_string(self) -> str: + """Helper property for model upload failure.""" + return ( + f"Training Pipeline {self.resource_name} is not configured to upload a " + "Model." + ) + + @property + def evaluated_data_items_bigquery_uri(self) -> Optional[str]: + """BigQuery location of exported evaluated examples from the Training Job + Returns: + str: BigQuery uri for the exported evaluated examples if the export + feature is enabled for training. + None: If the export feature was not enabled for training. + """ + return super().evaluated_data_items_bigquery_uri + + def _add_additional_experiments(self, additional_experiments: List[str]): + """Add experiment flags to the training job. + Args: + additional_experiments (List[str]): + Experiment flags that can enable some experimental training features. + """ + self._additional_experiments.extend(additional_experiments) + + @staticmethod + def _create_window_config( + column: Optional[str] = None, + stride_length: Optional[int] = None, + max_count: Optional[int] = None, + ) -> Optional[Dict[str, Union[int, str]]]: + """Creates a window config from training job arguments.""" + configs = { + "column": column, + "strideLength": stride_length, + "maxCount": max_count, + } + present_configs = {k: v for k, v in configs.items() if v is not None} + if not present_configs: + return None + if len(present_configs) > 1: + raise ValueError( + "More than one windowing strategy provided. Make sure only one " + "of window_column, window_stride_length, or window_max_count " + "is specified." + ) + return present_configs + + +# TODO(b/172368325) add scheduling, custom_job.Scheduling +class CustomTrainingJob(_CustomTrainingJob): + """Class to launch a Custom Training Job in Vertex AI using a script. + + Takes a training implementation as a python script and executes that + script in Cloud Vertex AI Training. + """ + + def __init__( + self, + # TODO(b/223262536): Make display_name parameter fully optional in next major release + display_name: str, + script_path: str, + container_uri: str, + requirements: Optional[Sequence[str]] = None, + model_serving_container_image_uri: Optional[str] = None, + model_serving_container_predict_route: Optional[str] = None, + model_serving_container_health_route: Optional[str] = None, + model_serving_container_command: Optional[Sequence[str]] = None, + model_serving_container_args: Optional[Sequence[str]] = None, + model_serving_container_environment_variables: Optional[Dict[str, str]] = None, + model_serving_container_ports: Optional[Sequence[int]] = None, + model_description: Optional[str] = None, + model_instance_schema_uri: Optional[str] = None, + model_parameters_schema_uri: Optional[str] = None, + model_prediction_schema_uri: Optional[str] = None, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, + training_encryption_spec_key_name: Optional[str] = None, + model_encryption_spec_key_name: Optional[str] = None, + staging_bucket: Optional[str] = None, + ): + """Constructs a Custom Training Job from a Python script. + + job = aiplatform.CustomTrainingJob( + display_name='test-train', + script_path='test_script.py', + requirements=['pandas', 'numpy'], + container_uri='gcr.io/cloud-aiplatform/training/tf-cpu.2-2:latest', + model_serving_container_image_uri='gcr.io/my-trainer/serving:1', + model_serving_container_predict_route='predict', + model_serving_container_health_route='metadata, + labels={'key': 'value'}, + ) + + Usage with Dataset: + + ds = aiplatform.TabularDataset( + 'projects/my-project/locations/us-central1/datasets/12345') + + job.run( + ds, + replica_count=1, + model_display_name='my-trained-model', + model_labels={'key': 'value'}, + ) + + Usage without Dataset: + + job.run(replica_count=1, model_display_name='my-trained-model) + + + TODO(b/169782082) add documentation about traning utilities + To ensure your model gets saved in Vertex AI, write your saved model to + os.environ["AIP_MODEL_DIR"] in your provided training script. + + + Args: + display_name (str): + Required. The user-defined name of this TrainingPipeline. + script_path (str): Required. Local path to training script. + container_uri (str): + Required: Uri of the training container image in the GCR. + requirements (Sequence[str]): + List of python packages dependencies of script. + model_serving_container_image_uri (str): + If the training produces a managed Vertex AI Model, the URI of the + Model serving container suitable for serving the model produced by the + training script. + model_serving_container_predict_route (str): + If the training produces a managed Vertex AI Model, An HTTP path to + send prediction requests to the container, and which must be supported + by it. If not specified a default HTTP path will be used by Vertex AI. + model_serving_container_health_route (str): + If the training produces a managed Vertex AI Model, an HTTP path to + send health check requests to the container, and which must be supported + by it. If not specified a standard HTTP path will be used by AI + Platform. + model_serving_container_command (Sequence[str]): + The command with which the container is run. Not executed within a + shell. The Docker image's ENTRYPOINT is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container's + environment. If a variable cannot be resolved, the reference in the + input string will be unchanged. The $(VAR_NAME) syntax can be escaped + with a double $$, ie: $$(VAR_NAME). Escaped references will never be + expanded, regardless of whether the variable exists or not. + model_serving_container_args (Sequence[str]): + The arguments to the command. The Docker image's CMD is used if this is + not provided. Variable references $(VAR_NAME) are expanded using the + container's environment. If a variable cannot be resolved, the reference + in the input string will be unchanged. The $(VAR_NAME) syntax can be + escaped with a double $$, ie: $$(VAR_NAME). Escaped references will + never be expanded, regardless of whether the variable exists or not. + model_serving_container_environment_variables (Dict[str, str]): + The environment variables that are to be present in the container. + Should be a dictionary where keys are environment variable names + and values are environment variable values for those names. + model_serving_container_ports (Sequence[int]): + Declaration of ports that are exposed by the container. This field is + primarily informational, it gives Vertex AI information about the + network connections the container uses. Listing or not a port here has + no impact on whether the port is actually exposed, any port listening on + the default "0.0.0.0" address inside a container will be accessible from + the network. + model_description (str): + The description of the Model. + model_instance_schema_uri (str): + Optional. Points to a YAML file stored on Google Cloud + Storage describing the format of a single instance, which + are used in + ``PredictRequest.instances``, + ``ExplainRequest.instances`` + and + ``BatchPredictionJob.input_config``. + The schema is defined as an OpenAPI 3.0.2 `Schema + Object `__. + AutoML Models always have this field populated by AI + Platform. Note: The URI given on output will be immutable + and probably different, including the URI scheme, than the + one given on input. The output URI will point to a location + where the user only has a read access. + model_parameters_schema_uri (str): + Optional. Points to a YAML file stored on Google Cloud + Storage describing the parameters of prediction and + explanation via + ``PredictRequest.parameters``, + ``ExplainRequest.parameters`` + and + ``BatchPredictionJob.model_parameters``. + The schema is defined as an OpenAPI 3.0.2 `Schema + Object `__. + AutoML Models always have this field populated by AI + Platform, if no parameters are supported it is set to an + empty string. Note: The URI given on output will be + immutable and probably different, including the URI scheme, + than the one given on input. The output URI will point to a + location where the user only has a read access. + model_prediction_schema_uri (str): + Optional. Points to a YAML file stored on Google Cloud + Storage describing the format of a single prediction + produced by this Model, which are returned via + ``PredictResponse.predictions``, + ``ExplainResponse.explanations``, + and + ``BatchPredictionJob.output_config``. + The schema is defined as an OpenAPI 3.0.2 `Schema + Object `__. + AutoML Models always have this field populated by AI + Platform. Note: The URI given on output will be immutable + and probably different, including the URI scheme, than the + one given on input. The output URI will point to a location + where the user only has a read access. + project (str): + Project to run training in. Overrides project set in aiplatform.init. + location (str): + Location to run training in. Overrides location set in aiplatform.init. + credentials (auth_credentials.Credentials): + Custom credentials to use to run call training service. Overrides + credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize TrainingPipelines. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. + training_encryption_spec_key_name (Optional[str]): + Optional. The Cloud KMS resource identifier of the customer + managed encryption key used to protect the training pipeline. Has the + form: + ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute + resource is created. + + If set, this TrainingPipeline will be secured by this key. + + Note: Model trained by this TrainingPipeline is also secured + by this key if ``model_to_upload`` is not set separately. + + Overrides encryption_spec_key_name set in aiplatform.init. + model_encryption_spec_key_name (Optional[str]): + Optional. The Cloud KMS resource identifier of the customer + managed encryption key used to protect the model. Has the + form: + ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute + resource is created. + + If set, the trained Model will be secured by this key. + + Overrides encryption_spec_key_name set in aiplatform.init. + staging_bucket (str): + Bucket used to stage source and training artifacts. Overrides + staging_bucket set in aiplatform.init. + """ + if not display_name: + display_name = self.__class__._generate_display_name() + super().__init__( + display_name=display_name, + project=project, + location=location, + credentials=credentials, + labels=labels, + training_encryption_spec_key_name=training_encryption_spec_key_name, + model_encryption_spec_key_name=model_encryption_spec_key_name, + container_uri=container_uri, + model_instance_schema_uri=model_instance_schema_uri, + model_parameters_schema_uri=model_parameters_schema_uri, + model_prediction_schema_uri=model_prediction_schema_uri, + model_serving_container_environment_variables=model_serving_container_environment_variables, + model_serving_container_ports=model_serving_container_ports, + model_serving_container_image_uri=model_serving_container_image_uri, + model_serving_container_command=model_serving_container_command, + model_serving_container_args=model_serving_container_args, + model_serving_container_predict_route=model_serving_container_predict_route, + model_serving_container_health_route=model_serving_container_health_route, + model_description=model_description, + staging_bucket=staging_bucket, + ) + + self._requirements = requirements + self._script_path = script_path + + def run( + self, + dataset: Optional[ + Union[ + datasets.ImageDataset, + datasets.TabularDataset, + datasets.TextDataset, + datasets.VideoDataset, + ] + ] = None, + annotation_schema_uri: Optional[str] = None, + model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, + base_output_dir: Optional[str] = None, + service_account: Optional[str] = None, + network: Optional[str] = None, + bigquery_destination: Optional[str] = None, + args: Optional[List[Union[str, float, int]]] = None, + environment_variables: Optional[Dict[str, str]] = None, + replica_count: int = 1, + machine_type: str = "n1-standard-4", + accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED", + accelerator_count: int = 0, + boot_disk_type: str = "pd-ssd", + boot_disk_size_gb: int = 100, + reduction_server_replica_count: int = 0, + reduction_server_machine_type: Optional[str] = None, + reduction_server_container_uri: Optional[str] = None, + training_fraction_split: Optional[float] = None, + validation_fraction_split: Optional[float] = None, + test_fraction_split: Optional[float] = None, + training_filter_split: Optional[str] = None, + validation_filter_split: Optional[str] = None, + test_filter_split: Optional[str] = None, + predefined_split_column_name: Optional[str] = None, + timestamp_split_column_name: Optional[str] = None, + timeout: Optional[int] = None, + restart_job_on_worker_restart: bool = False, + enable_web_access: bool = False, + tensorboard: Optional[str] = None, + sync=True, + create_request_timeout: Optional[float] = None, + ) -> Optional[models.Model]: + """Runs the custom training job. + + Distributed Training Support: + If replica count = 1 then one chief replica will be provisioned. If + replica_count > 1 the remainder will be provisioned as a worker replica pool. + ie: replica_count = 10 will result in 1 chief and 9 workers + All replicas have same machine_type, accelerator_type, and accelerator_count + + If training on a Vertex AI dataset, you can use one of the following split configurations: + Data fraction splits: + Any of ``training_fraction_split``, ``validation_fraction_split`` and + ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If + the provided ones sum to less than 1, the remainder is assigned to sets as + decided by Vertex AI. If none of the fractions are set, by default roughly 80% + of data will be used for training, 10% for validation, and 10% for test. + + Data filter splits: + Assigns input data to training, validation, and test sets + based on the given filters, data pieces not matched by any + filter are ignored. Currently only supported for Datasets + containing DataItems. + If any of the filters in this message are to match nothing, then + they can be set as '-' (the minus sign). + If using filter splits, all of ``training_filter_split``, ``validation_filter_split`` and + ``test_filter_split`` must be provided. + Supported only for unstructured Datasets. + + Predefined splits: + Assigns input data to training, validation, and test sets based on the value of a provided key. + If using predefined splits, ``predefined_split_column_name`` must be provided. + Supported only for tabular Datasets. + + Timestamp splits: + Assigns input data to training, validation, and test sets + based on a provided timestamps. The youngest data pieces are + assigned to training set, next to validation set, and the oldest + to the test set. + Supported only for tabular Datasets. + + Args: + dataset ( + Union[ + datasets.ImageDataset, + datasets.TabularDataset, + datasets.TextDataset, + datasets.VideoDataset, + ] + ): + Vertex AI to fit this training against. Custom training script should + retrieve datasets through passed in environment variables uris: + + os.environ["AIP_TRAINING_DATA_URI"] + os.environ["AIP_VALIDATION_DATA_URI"] + os.environ["AIP_TEST_DATA_URI"] + + Additionally the dataset format is passed in as: + + os.environ["AIP_DATA_FORMAT"] + annotation_schema_uri (str): + Google Cloud Storage URI points to a YAML file describing + annotation schema. The schema is defined as an OpenAPI 3.0.2 + [Schema Object](https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.0.2.md#schema-object) The schema files + that can be used here are found in + gs://google-cloud-aiplatform/schema/dataset/annotation/, + note that the chosen schema must be consistent with + ``metadata`` + of the Dataset specified by + ``dataset_id``. + + Only Annotations that both match this schema and belong to + DataItems not ignored by the split method are used in + respectively training, validation or test role, depending on + the role of the DataItem they are on. + + When used in conjunction with + ``annotations_filter``, + the Annotations used for training are filtered by both + ``annotations_filter`` + and + ``annotation_schema_uri``. + model_display_name (str): + If the script produces a managed Vertex AI Model. The display name of + the Model. The name can be up to 128 characters long and can be consist + of any UTF-8 characters. + + If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. + base_output_dir (str): + GCS output directory of job. If not provided a + timestamped directory in the staging directory will be used. + + Vertex AI sets the following environment variables when it runs your training code: + + - AIP_MODEL_DIR: a Cloud Storage URI of a directory intended for saving model artifacts, i.e. /model/ + - AIP_CHECKPOINT_DIR: a Cloud Storage URI of a directory intended for saving checkpoints, i.e. /checkpoints/ + - AIP_TENSORBOARD_LOG_DIR: a Cloud Storage URI of a directory intended for saving TensorBoard logs, i.e. /logs/ + + service_account (str): + Specifies the service account for workload run-as account. + Users submitting jobs must have act-as permission on this run-as account. + network (str): + The full name of the Compute Engine network to which the job + should be peered. For example, projects/12345/global/networks/myVPC. + Private services access must already be configured for the network. + If left unspecified, the job is not peered with any network. + bigquery_destination (str): + Provide this field if `dataset` is a BiqQuery dataset. + The BigQuery project location where the training data is to + be written to. In the given project a new dataset is created + with name + ``dataset___`` + where timestamp is in YYYY_MM_DDThh_mm_ss_sssZ format. All + training input data will be written into that dataset. In + the dataset three tables will be created, ``training``, + ``validation`` and ``test``. + + - AIP_DATA_FORMAT = "bigquery". + - AIP_TRAINING_DATA_URI ="bigquery_destination.dataset_*.training" + - AIP_VALIDATION_DATA_URI = "bigquery_destination.dataset_*.validation" + - AIP_TEST_DATA_URI = "bigquery_destination.dataset_*.test" + args (List[Unions[str, int, float]]): + Command line arguments to be passed to the Python script. + environment_variables (Dict[str, str]): + Environment variables to be passed to the container. + Should be a dictionary where keys are environment variable names + and values are environment variable values for those names. + At most 10 environment variables can be specified. + The Name of the environment variable must be unique. + + environment_variables = { + 'MY_KEY': 'MY_VALUE' + } + replica_count (int): + The number of worker replicas. If replica count = 1 then one chief + replica will be provisioned. If replica_count > 1 the remainder will be + provisioned as a worker replica pool. + machine_type (str): + The type of machine to use for training. + accelerator_type (str): + Hardware accelerator type. One of ACCELERATOR_TYPE_UNSPECIFIED, + NVIDIA_TESLA_K80, NVIDIA_TESLA_P100, NVIDIA_TESLA_V100, NVIDIA_TESLA_P4, + NVIDIA_TESLA_T4 + accelerator_count (int): + The number of accelerators to attach to a worker replica. + boot_disk_type (str): + Type of the boot disk, default is `pd-ssd`. + Valid values: `pd-ssd` (Persistent Disk Solid State Drive) or + `pd-standard` (Persistent Disk Hard Disk Drive). + boot_disk_size_gb (int): + Size in GB of the boot disk, default is 100GB. + boot disk size must be within the range of [100, 64000]. + reduction_server_replica_count (int): + The number of reduction server replicas, default is 0. + reduction_server_machine_type (str): + Optional. The type of machine to use for reduction server. + reduction_server_container_uri (str): + Optional. The Uri of the reduction server container image. + See details: https://cloud.google.com/vertex-ai/docs/training/distributed-training#reduce_training_time_with_reduction_server + training_fraction_split (float): + Optional. The fraction of the input data that is to be used to train + the Model. This is ignored if Dataset is not provided. + validation_fraction_split (float): + Optional. The fraction of the input data that is to be used to validate + the Model. This is ignored if Dataset is not provided. + test_fraction_split (float): + Optional. The fraction of the input data that is to be used to evaluate + the Model. This is ignored if Dataset is not provided. + training_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to train the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + validation_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to validate the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + test_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to test the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + predefined_split_column_name (str): + Optional. The key is a name of one of the Dataset's data + columns. The value of the key (either the label's value or + value in the column) must be one of {``training``, + ``validation``, ``test``}, and it defines to which set the + given piece of data is assigned. If for a piece of data the + key is not present or has an invalid value, that piece is + ignored by the pipeline. + + Supported only for tabular and time series Datasets. + timestamp_split_column_name (str): + Optional. The key is a name of one of the Dataset's data + columns. The value of the key values of the key (the values in + the column) must be in RFC 3339 `date-time` format, where + `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a + piece of data the key is not present or has an invalid value, + that piece is ignored by the pipeline. + + Supported only for tabular and time series Datasets. + timeout (int): + The maximum job running time in seconds. The default is 7 days. + restart_job_on_worker_restart (bool): + Restarts the entire CustomJob if a worker + gets restarted. This feature can be used by + distributed training jobs that are not resilient + to workers leaving and joining a job. + enable_web_access (bool): + Whether you want Vertex AI to enable interactive shell access + to training containers. + https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell + tensorboard (str): + Optional. The name of a Vertex AI + [Tensorboard][google.cloud.aiplatform.v1beta1.Tensorboard] + resource to which this CustomJob will upload Tensorboard + logs. Format: + ``projects/{project}/locations/{location}/tensorboards/{tensorboard}`` + + The training script should write Tensorboard to following Vertex AI environment + variable: + + AIP_TENSORBOARD_LOG_DIR + + `service_account` is required with provided `tensorboard`. + For more information on configuring your service account please visit: + https://cloud.google.com/vertex-ai/docs/experiments/tensorboard-training + create_request_timeout (float): + Optional. The timeout for the create request in seconds. + sync (bool): + Whether to execute this method synchronously. If False, this method + will be executed in concurrent Future and any downstream object will + be immediately returned and synced when the Future has completed. + + Returns: + model: The trained Vertex AI Model resource or None if training did not + produce a Vertex AI Model. + """ + worker_pool_specs, managed_model = self._prepare_and_validate_run( + model_display_name=model_display_name, + model_labels=model_labels, + replica_count=replica_count, + machine_type=machine_type, + accelerator_count=accelerator_count, + accelerator_type=accelerator_type, + boot_disk_type=boot_disk_type, + boot_disk_size_gb=boot_disk_size_gb, + reduction_server_replica_count=reduction_server_replica_count, + reduction_server_machine_type=reduction_server_machine_type, + ) + + # make and copy package + python_packager = source_utils._TrainingScriptPythonPackager( + script_path=self._script_path, requirements=self._requirements + ) + + return self._run( + python_packager=python_packager, + dataset=dataset, + annotation_schema_uri=annotation_schema_uri, + worker_pool_specs=worker_pool_specs, + managed_model=managed_model, + args=args, + environment_variables=environment_variables, + base_output_dir=base_output_dir, + service_account=service_account, + network=network, + bigquery_destination=bigquery_destination, + training_fraction_split=training_fraction_split, + validation_fraction_split=validation_fraction_split, + test_fraction_split=test_fraction_split, + training_filter_split=training_filter_split, + validation_filter_split=validation_filter_split, + test_filter_split=test_filter_split, + predefined_split_column_name=predefined_split_column_name, + timestamp_split_column_name=timestamp_split_column_name, + timeout=timeout, + restart_job_on_worker_restart=restart_job_on_worker_restart, + enable_web_access=enable_web_access, + tensorboard=tensorboard, + reduction_server_container_uri=reduction_server_container_uri + if reduction_server_replica_count > 0 + else None, + sync=sync, + create_request_timeout=create_request_timeout, + ) + + @base.optional_sync(construct_object_on_arg="managed_model") + def _run( + self, + python_packager: source_utils._TrainingScriptPythonPackager, + dataset: Optional[ + Union[ + datasets.ImageDataset, + datasets.TabularDataset, + datasets.TextDataset, + datasets.VideoDataset, + ] + ], + annotation_schema_uri: Optional[str], + worker_pool_specs: worker_spec_utils._DistributedTrainingSpec, + managed_model: Optional[gca_model.Model] = None, + args: Optional[List[Union[str, float, int]]] = None, + environment_variables: Optional[Dict[str, str]] = None, + base_output_dir: Optional[str] = None, + service_account: Optional[str] = None, + network: Optional[str] = None, + bigquery_destination: Optional[str] = None, + training_fraction_split: Optional[float] = None, + validation_fraction_split: Optional[float] = None, + test_fraction_split: Optional[float] = None, + training_filter_split: Optional[str] = None, + validation_filter_split: Optional[str] = None, + test_filter_split: Optional[str] = None, + predefined_split_column_name: Optional[str] = None, + timestamp_split_column_name: Optional[str] = None, + timeout: Optional[int] = None, + restart_job_on_worker_restart: bool = False, + enable_web_access: bool = False, + tensorboard: Optional[str] = None, + reduction_server_container_uri: Optional[str] = None, + sync=True, + create_request_timeout: Optional[float] = None, + ) -> Optional[models.Model]: + """Packages local script and launches training_job. + + Args: + python_packager (source_utils._TrainingScriptPythonPackager): + Required. Python Packager pointing to training script locally. + dataset ( + Union[ + datasets.ImageDataset, + datasets.TabularDataset, + datasets.TextDataset, + datasets.VideoDataset, + ] + ): + Vertex AI to fit this training against. + annotation_schema_uri (str): + Google Cloud Storage URI points to a YAML file describing + annotation schema. + worker_pools_spec (worker_spec_utils._DistributedTrainingSpec): + Worker pools pecs required to run job. + managed_model (gca_model.Model): + Model proto if this script produces a Managed Model. + args (List[Unions[str, int, float]]): + Command line arguments to be passed to the Python script. + environment_variables (Dict[str, str]): + Environment variables to be passed to the container. + Should be a dictionary where keys are environment variable names + and values are environment variable values for those names. + At most 10 environment variables can be specified. + The Name of the environment variable must be unique. + + environment_variables = { + 'MY_KEY': 'MY_VALUE' + } + base_output_dir (str): + GCS output directory of job. If not provided a + timestamped directory in the staging directory will be used. + + Vertex AI sets the following environment variables when it runs your training code: + + - AIP_MODEL_DIR: a Cloud Storage URI of a directory intended for saving model artifacts, i.e. /model/ + - AIP_CHECKPOINT_DIR: a Cloud Storage URI of a directory intended for saving checkpoints, i.e. /checkpoints/ + - AIP_TENSORBOARD_LOG_DIR: a Cloud Storage URI of a directory intended for saving TensorBoard logs, i.e. /logs/ + + service_account (str): + Specifies the service account for workload run-as account. + Users submitting jobs must have act-as permission on this run-as account. + network (str): + The full name of the Compute Engine network to which the job + should be peered. For example, projects/12345/global/networks/myVPC. + Private services access must already be configured for the network. + If left unspecified, the job is not peered with any network. + bigquery_destination (str): + Provide this field if `dataset` is a BiqQuery dataset. + The BigQuery project location where the training data is to + be written to. In the given project a new dataset is created + with name + ``dataset___`` + where timestamp is in YYYY_MM_DDThh_mm_ss_sssZ format. All + training input data will be written into that dataset. In + the dataset three tables will be created, ``training``, + ``validation`` and ``test``. + + - AIP_DATA_FORMAT = "bigquery". + - AIP_TRAINING_DATA_URI ="bigquery_destination.dataset_*.training" + - AIP_VALIDATION_DATA_URI = "bigquery_destination.dataset_*.validation" + - AIP_TEST_DATA_URI = "bigquery_destination.dataset_*.test" + training_fraction_split (float): + Optional. The fraction of the input data that is to be used to train + the Model. This is ignored if Dataset is not provided. + validation_fraction_split (float): + Optional. The fraction of the input data that is to be used to validate + the Model. This is ignored if Dataset is not provided. + test_fraction_split (float): + Optional. The fraction of the input data that is to be used to evaluate + the Model. This is ignored if Dataset is not provided. + training_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to train the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + validation_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to validate the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + test_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to test the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + predefined_split_column_name (str): + Optional. The key is a name of one of the Dataset's data + columns. The value of the key (either the label's value or + value in the column) must be one of {``training``, + ``validation``, ``test``}, and it defines to which set the + given piece of data is assigned. If for a piece of data the + key is not present or has an invalid value, that piece is + ignored by the pipeline. + + Supported only for tabular and time series Datasets. + timestamp_split_column_name (str): + Optional. The key is a name of one of the Dataset's data + columns. The value of the key values of the key (the values in + the column) must be in RFC 3339 `date-time` format, where + `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a + piece of data the key is not present or has an invalid value, + that piece is ignored by the pipeline. + + Supported only for tabular and time series Datasets. + timeout (int): + The maximum job running time in seconds. The default is 7 days. + restart_job_on_worker_restart (bool): + Restarts the entire CustomJob if a worker + gets restarted. This feature can be used by + distributed training jobs that are not resilient + to workers leaving and joining a job. + enable_web_access (bool): + Whether you want Vertex AI to enable interactive shell access + to training containers. + https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell + tensorboard (str): + Optional. The name of a Vertex AI + [Tensorboard][google.cloud.aiplatform.v1beta1.Tensorboard] + resource to which this CustomJob will upload Tensorboard + logs. Format: + ``projects/{project}/locations/{location}/tensorboards/{tensorboard}`` + + The training script should write Tensorboard to following Vertex AI environment + variable: + + AIP_TENSORBOARD_LOG_DIR + + `service_account` is required with provided `tensorboard`. + For more information on configuring your service account please visit: + https://cloud.google.com/vertex-ai/docs/experiments/tensorboard-training + reduction_server_container_uri (str): + Optional. The Uri of the reduction server container image. + sync (bool): + Whether to execute this method synchronously. If False, this method + will be executed in concurrent Future and any downstream object will + be immediately returned and synced when the Future has completed. + create_request_timeout (float) + Optional. The timeout for the create request in seconds + + Returns: + model: The trained Vertex AI Model resource or None if training did not + produce a Vertex AI Model. + """ + package_gcs_uri = python_packager.package_and_copy_to_gcs( + gcs_staging_dir=self._staging_bucket, + project=self.project, + credentials=self.credentials, + ) + + for spec_order, spec in enumerate(worker_pool_specs): + + if not spec: + continue if ( spec_order == worker_spec_utils._SPEC_ORDERS["server_spec"] @@ -3394,217 +4246,22 @@ def __init__( model_encryption_spec_key_name=model_encryption_spec_key_name, ) - self._column_transformations = ( - column_transformations_utils.validate_and_get_column_transformations( - column_specs, column_transformations - ) - ) - - self._optimization_objective = optimization_objective - self._optimization_prediction_type = optimization_prediction_type - self._optimization_objective_recall_value = optimization_objective_recall_value - self._optimization_objective_precision_value = ( - optimization_objective_precision_value - ) - - self._additional_experiments = [] - - def run( - self, - dataset: datasets.TabularDataset, - target_column: str, - training_fraction_split: Optional[float] = None, - validation_fraction_split: Optional[float] = None, - test_fraction_split: Optional[float] = None, - predefined_split_column_name: Optional[str] = None, - timestamp_split_column_name: Optional[str] = None, - weight_column: Optional[str] = None, - budget_milli_node_hours: int = 1000, - model_display_name: Optional[str] = None, - model_labels: Optional[Dict[str, str]] = None, - disable_early_stopping: bool = False, - export_evaluated_data_items: bool = False, - export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, - export_evaluated_data_items_override_destination: bool = False, - additional_experiments: Optional[List[str]] = None, - sync: bool = True, - create_request_timeout: Optional[float] = None, - ) -> models.Model: - """Runs the training job and returns a model. - - If training on a Vertex AI dataset, you can use one of the following split configurations: - Data fraction splits: - Any of ``training_fraction_split``, ``validation_fraction_split`` and - ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If - the provided ones sum to less than 1, the remainder is assigned to sets as - decided by Vertex AI. If none of the fractions are set, by default roughly 80% - of data will be used for training, 10% for validation, and 10% for test. - - Predefined splits: - Assigns input data to training, validation, and test sets based on the value of a provided key. - If using predefined splits, ``predefined_split_column_name`` must be provided. - Supported only for tabular Datasets. - - Timestamp splits: - Assigns input data to training, validation, and test sets - based on a provided timestamps. The youngest data pieces are - assigned to training set, next to validation set, and the oldest - to the test set. - Supported only for tabular Datasets. - - Args: - dataset (datasets.TabularDataset): - Required. The dataset within the same Project from which data will be used to train the Model. The - Dataset must use schema compatible with Model being trained, - and what is compatible should be described in the used - TrainingPipeline's [training_task_definition] - [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. - For tabular Datasets, all their data is exported to - training, to pick and choose from. - target_column (str): - Required. The name of the column values of which the Model is to predict. - training_fraction_split (float): - Optional. The fraction of the input data that is to be used to train - the Model. This is ignored if Dataset is not provided. - validation_fraction_split (float): - Optional. The fraction of the input data that is to be used to validate - the Model. This is ignored if Dataset is not provided. - test_fraction_split (float): - Optional. The fraction of the input data that is to be used to evaluate - the Model. This is ignored if Dataset is not provided. - predefined_split_column_name (str): - Optional. The key is a name of one of the Dataset's data - columns. The value of the key (either the label's value or - value in the column) must be one of {``training``, - ``validation``, ``test``}, and it defines to which set the - given piece of data is assigned. If for a piece of data the - key is not present or has an invalid value, that piece is - ignored by the pipeline. - - Supported only for tabular and time series Datasets. - timestamp_split_column_name (str): - Optional. The key is a name of one of the Dataset's data - columns. The value of the key values of the key (the values in - the column) must be in RFC 3339 `date-time` format, where - `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a - piece of data the key is not present or has an invalid value, - that piece is ignored by the pipeline. - Supported only for tabular and time series Datasets. - This parameter must be used with training_fraction_split, - validation_fraction_split, and test_fraction_split. - weight_column (str): - Optional. Name of the column that should be used as the weight column. - Higher values in this column give more importance to the row - during Model training. The column must have numeric values between 0 and - 10000 inclusively, and 0 value means that the row is ignored. - If the weight column field is not set, then all rows are assumed to have - equal weight of 1. - budget_milli_node_hours (int): - Optional. The train budget of creating this Model, expressed in milli node - hours i.e. 1,000 value in this field means 1 node hour. - The training cost of the model will not exceed this budget. The final - cost will be attempted to be close to the budget, though may end up - being (even) noticeably smaller - at the backend's discretion. This - especially may happen when further model training ceases to provide - any improvements. - If the budget is set to a value known to be insufficient to train a - Model for the given training set, the training won't be attempted and - will error. - The minimum value is 1000 and the maximum is 72000. - model_display_name (str): - Optional. If the script produces a managed Vertex AI Model. The display name of - the Model. The name can be up to 128 characters long and can be consist - of any UTF-8 characters. - - If not provided upon creation, the job's display_name is used. - model_labels (Dict[str, str]): - Optional. The labels with user-defined metadata to - organize your Models. - Label keys and values can be no longer than 64 - characters (Unicode codepoints), can only - contain lowercase letters, numeric characters, - underscores and dashes. International characters - are allowed. - See https://goo.gl/xmQnxf for more information - and examples of labels. - disable_early_stopping (bool): - Required. If true, the entire budget is used. This disables the early stopping - feature. By default, the early stopping feature is enabled, which means - that training might stop before the entire training budget has been - used, if further training does no longer brings significant improvement - to the model. - export_evaluated_data_items (bool): - Whether to export the test set predictions to a BigQuery table. - If False, then the export is not performed. - export_evaluated_data_items_bigquery_destination_uri (string): - Optional. URI of desired destination BigQuery table for exported test set predictions. - - Expected format: - ``bq://::
`` - - If not specified, then results are exported to the following auto-created BigQuery - table: - ``:export_evaluated_examples__.evaluated_examples`` - - Applies only if [export_evaluated_data_items] is True. - export_evaluated_data_items_override_destination (bool): - Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], - if the table exists, for exported test set predictions. If False, and the - table exists, then the training job will fail. - - Applies only if [export_evaluated_data_items] is True and - [export_evaluated_data_items_bigquery_destination_uri] is specified. - additional_experiments (List[str]): - Optional. Additional experiment flags for the automl tables training. - sync (bool): - Whether to execute this method synchronously. If False, this method - will be executed in concurrent Future and any downstream object will - be immediately returned and synced when the Future has completed. - create_request_timeout (float): - Optional. The timeout for the create request in seconds. - Returns: - model: The trained Vertex AI Model resource or None if training did not - produce a Vertex AI Model. - - Raises: - RuntimeError: If Training job has already been run or is waiting to run. - """ - if model_display_name: - utils.validate_display_name(model_display_name) - if model_labels: - utils.validate_labels(model_labels) - - if self._is_waiting_to_run(): - raise RuntimeError("AutoML Tabular Training is already scheduled to run.") - - if self._has_run: - raise RuntimeError("AutoML Tabular Training has already run.") - - if additional_experiments: - self._add_additional_experiments(additional_experiments) - - return self._run( - dataset=dataset, - target_column=target_column, - training_fraction_split=training_fraction_split, - validation_fraction_split=validation_fraction_split, - test_fraction_split=test_fraction_split, - predefined_split_column_name=predefined_split_column_name, - timestamp_split_column_name=timestamp_split_column_name, - weight_column=weight_column, - budget_milli_node_hours=budget_milli_node_hours, - model_display_name=model_display_name, - model_labels=model_labels, - disable_early_stopping=disable_early_stopping, - export_evaluated_data_items=export_evaluated_data_items, - export_evaluated_data_items_bigquery_destination_uri=export_evaluated_data_items_bigquery_destination_uri, - export_evaluated_data_items_override_destination=export_evaluated_data_items_override_destination, - sync=sync, - create_request_timeout=create_request_timeout, - ) - - @base.optional_sync() - def _run( + self._column_transformations = ( + column_transformations_utils.validate_and_get_column_transformations( + column_specs, column_transformations + ) + ) + + self._optimization_objective = optimization_objective + self._optimization_prediction_type = optimization_prediction_type + self._optimization_objective_recall_value = optimization_objective_recall_value + self._optimization_objective_precision_value = ( + optimization_objective_precision_value + ) + + self._additional_experiments = [] + + def run( self, dataset: datasets.TabularDataset, target_column: str, @@ -3621,6 +4278,7 @@ def _run( export_evaluated_data_items: bool = False, export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, export_evaluated_data_items_override_destination: bool = False, + additional_experiments: Optional[List[str]] = None, sync: bool = True, create_request_timeout: Optional[float] = None, ) -> models.Model: @@ -3730,321 +4388,91 @@ def _run( export_evaluated_data_items (bool): Whether to export the test set predictions to a BigQuery table. If False, then the export is not performed. - export_evaluated_data_items_bigquery_destination_uri (string): - Optional. URI of desired destination BigQuery table for exported test set predictions. - - Expected format: - ``bq://::
`` - - If not specified, then results are exported to the following auto-created BigQuery - table: - ``:export_evaluated_examples__.evaluated_examples`` - - Applies only if [export_evaluated_data_items] is True. - export_evaluated_data_items_override_destination (bool): - Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], - if the table exists, for exported test set predictions. If False, and the - table exists, then the training job will fail. - - Applies only if [export_evaluated_data_items] is True and - [export_evaluated_data_items_bigquery_destination_uri] is specified. - sync (bool): - Whether to execute this method synchronously. If False, this method - will be executed in concurrent Future and any downstream object will - be immediately returned and synced when the Future has completed. - create_request_timeout (float): - Optional. The timeout for the create request in seconds. - - Returns: - model: The trained Vertex AI Model resource or None if training did not - produce a Vertex AI Model. - """ - - training_task_definition = schema.training_job.definition.automl_tabular - - # auto-populate transformations - if self._column_transformations is None: - _LOGGER.info( - "No column transformations provided, so now retrieving columns from dataset in order to set default column transformations." - ) - - ( - self._column_transformations, - column_names, - ) = column_transformations_utils.get_default_column_transformations( - dataset=dataset, target_column=target_column - ) - - _LOGGER.info( - "The column transformation of type 'auto' was set for the following columns: %s." - % column_names - ) - - training_task_inputs_dict = { - # required inputs - "targetColumn": target_column, - "transformations": self._column_transformations, - "trainBudgetMilliNodeHours": budget_milli_node_hours, - # optional inputs - "weightColumnName": weight_column, - "disableEarlyStopping": disable_early_stopping, - "optimizationObjective": self._optimization_objective, - "predictionType": self._optimization_prediction_type, - "optimizationObjectiveRecallValue": self._optimization_objective_recall_value, - "optimizationObjectivePrecisionValue": self._optimization_objective_precision_value, - } - - final_export_eval_bq_uri = export_evaluated_data_items_bigquery_destination_uri - if final_export_eval_bq_uri and not final_export_eval_bq_uri.startswith( - "bq://" - ): - final_export_eval_bq_uri = f"bq://{final_export_eval_bq_uri}" - - if export_evaluated_data_items: - training_task_inputs_dict["exportEvaluatedDataItemsConfig"] = { - "destinationBigqueryUri": final_export_eval_bq_uri, - "overrideExistingTable": export_evaluated_data_items_override_destination, - } - - if self._additional_experiments: - training_task_inputs_dict[ - "additionalExperiments" - ] = self._additional_experiments - - model = gca_model.Model( - display_name=model_display_name or self._display_name, - labels=model_labels or self._labels, - encryption_spec=self._model_encryption_spec, - ) - - return self._run_job( - training_task_definition=training_task_definition, - training_task_inputs=training_task_inputs_dict, - dataset=dataset, - training_fraction_split=training_fraction_split, - validation_fraction_split=validation_fraction_split, - test_fraction_split=test_fraction_split, - predefined_split_column_name=predefined_split_column_name, - timestamp_split_column_name=timestamp_split_column_name, - model=model, - create_request_timeout=create_request_timeout, - ) - - @property - def _model_upload_fail_string(self) -> str: - """Helper property for model upload failure.""" - return ( - f"Training Pipeline {self.resource_name} is not configured to upload a " - "Model." - ) - - def _add_additional_experiments(self, additional_experiments: List[str]): - """Add experiment flags to the training job. - Args: - additional_experiments (List[str]): - Experiment flags that can enable some experimental training features. - """ - self._additional_experiments.extend(additional_experiments) - - @staticmethod - def get_auto_column_specs( - dataset: datasets.TabularDataset, - target_column: str, - ) -> Dict[str, str]: - """Returns a dict with all non-target columns as keys and 'auto' as values. - - Example usage: - - column_specs = training_jobs.AutoMLTabularTrainingJob.get_auto_column_specs( - dataset=my_dataset, - target_column="my_target_column", - ) - - Args: - dataset (datasets.TabularDataset): - Required. Intended dataset. - target_column(str): - Required. Intended target column. - Returns: - Dict[str, str] - Column names as keys and 'auto' as values - """ - column_names = [ - column for column in dataset.column_names if column != target_column - ] - column_specs = {column: "auto" for column in column_names} - return column_specs - - class column_data_types: - AUTO = "auto" - NUMERIC = "numeric" - CATEGORICAL = "categorical" - TIMESTAMP = "timestamp" - TEXT = "text" - REPEATED_NUMERIC = "repeated_numeric" - REPEATED_CATEGORICAL = "repeated_categorical" - REPEATED_TEXT = "repeated_text" - - -class AutoMLForecastingTrainingJob(_TrainingJob): - _supported_training_schemas = (schema.training_job.definition.automl_forecasting,) - - def __init__( - self, - display_name: Optional[str] = None, - optimization_objective: Optional[str] = None, - column_specs: Optional[Dict[str, str]] = None, - column_transformations: Optional[List[Dict[str, Dict[str, str]]]] = None, - project: Optional[str] = None, - location: Optional[str] = None, - credentials: Optional[auth_credentials.Credentials] = None, - labels: Optional[Dict[str, str]] = None, - training_encryption_spec_key_name: Optional[str] = None, - model_encryption_spec_key_name: Optional[str] = None, - ): - """Constructs a AutoML Forecasting Training Job. - - Args: - display_name (str): - Optional. The user-defined name of this TrainingPipeline. - optimization_objective (str): - Optional. Objective function the model is to be optimized towards. - The training process creates a Model that optimizes the value of the objective - function over the validation set. The supported optimization objectives: - "minimize-rmse" (default) - Minimize root-mean-squared error (RMSE). - "minimize-mae" - Minimize mean-absolute error (MAE). - "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE). - "minimize-rmspe" - Minimize root-mean-squared percentage error (RMSPE). - "minimize-wape-mae" - Minimize the combination of weighted absolute percentage error (WAPE) - and mean-absolute-error (MAE). - "minimize-quantile-loss" - Minimize the quantile loss at the defined quantiles. - (Set this objective to build quantile forecasts.) - column_specs (Dict[str, str]): - Optional. Alternative to column_transformations where the keys of the dict - are column names and their respective values are one of - AutoMLTabularTrainingJob.column_data_types. - When creating transformation for BigQuery Struct column, the column - should be flattened using "." as the delimiter. Only columns with no child - should have a transformation. - If an input column has no transformations on it, such a column is - ignored by the training, except for the targetColumn, which should have - no transformations defined on. - Only one of column_transformations or column_specs should be passed. - column_transformations (List[Dict[str, Dict[str, str]]]): - Optional. Transformations to apply to the input columns (i.e. columns other - than the targetColumn). Each transformation may produce multiple - result values from the column's value, and all are used for training. - When creating transformation for BigQuery Struct column, the column - should be flattened using "." as the delimiter. Only columns with no child - should have a transformation. - If an input column has no transformations on it, such a column is - ignored by the training, except for the targetColumn, which should have - no transformations defined on. - Only one of column_transformations or column_specs should be passed. - Consider using column_specs as column_transformations will be deprecated eventually. - project (str): - Optional. Project to run training in. Overrides project set in aiplatform.init. - location (str): - Optional. Location to run training in. Overrides location set in aiplatform.init. - credentials (auth_credentials.Credentials): - Optional. Custom credentials to use to run call training service. Overrides - credentials set in aiplatform.init. - labels (Dict[str, str]): - Optional. The labels with user-defined metadata to - organize TrainingPipelines. - Label keys and values can be no longer than 64 - characters (Unicode codepoints), can only - contain lowercase letters, numeric characters, - underscores and dashes. International characters - are allowed. - See https://goo.gl/xmQnxf for more information - and examples of labels. - training_encryption_spec_key_name (Optional[str]): - Optional. The Cloud KMS resource identifier of the customer - managed encryption key used to protect the training pipeline. Has the - form: - ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. - The key needs to be in the same region as where the compute - resource is created. - - If set, this TrainingPipeline will be secured by this key. + export_evaluated_data_items_bigquery_destination_uri (string): + Optional. URI of desired destination BigQuery table for exported test set predictions. - Note: Model trained by this TrainingPipeline is also secured - by this key if ``model_to_upload`` is not set separately. + Expected format: + ``bq://::
`` - Overrides encryption_spec_key_name set in aiplatform.init. - model_encryption_spec_key_name (Optional[str]): - Optional. The Cloud KMS resource identifier of the customer - managed encryption key used to protect the model. Has the - form: - ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. - The key needs to be in the same region as where the compute - resource is created. + If not specified, then results are exported to the following auto-created BigQuery + table: + ``:export_evaluated_examples__.evaluated_examples`` - If set, the trained Model will be secured by this key. + Applies only if [export_evaluated_data_items] is True. + export_evaluated_data_items_override_destination (bool): + Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], + if the table exists, for exported test set predictions. If False, and the + table exists, then the training job will fail. - Overrides encryption_spec_key_name set in aiplatform.init. + Applies only if [export_evaluated_data_items] is True and + [export_evaluated_data_items_bigquery_destination_uri] is specified. + additional_experiments (List[str]): + Optional. Additional experiment flags for the automl tables training. + sync (bool): + Whether to execute this method synchronously. If False, this method + will be executed in concurrent Future and any downstream object will + be immediately returned and synced when the Future has completed. + create_request_timeout (float): + Optional. The timeout for the create request in seconds. + Returns: + model: The trained Vertex AI Model resource or None if training did not + produce a Vertex AI Model. Raises: - ValueError: If both column_transformations and column_specs were provided. + RuntimeError: If Training job has already been run or is waiting to run. """ - if not display_name: - display_name = self.__class__._generate_display_name() - super().__init__( - display_name=display_name, - project=project, - location=location, - credentials=credentials, - labels=labels, - training_encryption_spec_key_name=training_encryption_spec_key_name, - model_encryption_spec_key_name=model_encryption_spec_key_name, - ) + if model_display_name: + utils.validate_display_name(model_display_name) + if model_labels: + utils.validate_labels(model_labels) - self._column_transformations = ( - column_transformations_utils.validate_and_get_column_transformations( - column_specs, column_transformations - ) - ) + if self._is_waiting_to_run(): + raise RuntimeError("AutoML Tabular Training is already scheduled to run.") - self._optimization_objective = optimization_objective - self._additional_experiments = [] + if self._has_run: + raise RuntimeError("AutoML Tabular Training has already run.") - def run( + if additional_experiments: + self._add_additional_experiments(additional_experiments) + + return self._run( + dataset=dataset, + target_column=target_column, + training_fraction_split=training_fraction_split, + validation_fraction_split=validation_fraction_split, + test_fraction_split=test_fraction_split, + predefined_split_column_name=predefined_split_column_name, + timestamp_split_column_name=timestamp_split_column_name, + weight_column=weight_column, + budget_milli_node_hours=budget_milli_node_hours, + model_display_name=model_display_name, + model_labels=model_labels, + disable_early_stopping=disable_early_stopping, + export_evaluated_data_items=export_evaluated_data_items, + export_evaluated_data_items_bigquery_destination_uri=export_evaluated_data_items_bigquery_destination_uri, + export_evaluated_data_items_override_destination=export_evaluated_data_items_override_destination, + sync=sync, + create_request_timeout=create_request_timeout, + ) + + @base.optional_sync() + def _run( self, - dataset: datasets.TimeSeriesDataset, + dataset: datasets.TabularDataset, target_column: str, - time_column: str, - time_series_identifier_column: str, - unavailable_at_forecast_columns: List[str], - available_at_forecast_columns: List[str], - forecast_horizon: int, - data_granularity_unit: str, - data_granularity_count: int, training_fraction_split: Optional[float] = None, validation_fraction_split: Optional[float] = None, test_fraction_split: Optional[float] = None, predefined_split_column_name: Optional[str] = None, timestamp_split_column_name: Optional[str] = None, weight_column: Optional[str] = None, - time_series_attribute_columns: Optional[List[str]] = None, - context_window: Optional[int] = None, - export_evaluated_data_items: bool = False, - export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, - export_evaluated_data_items_override_destination: bool = False, - quantiles: Optional[List[float]] = None, - validation_options: Optional[str] = None, budget_milli_node_hours: int = 1000, model_display_name: Optional[str] = None, model_labels: Optional[Dict[str, str]] = None, - additional_experiments: Optional[List[str]] = None, - hierarchy_group_columns: Optional[List[str]] = None, - hierarchy_group_total_weight: Optional[float] = None, - hierarchy_temporal_total_weight: Optional[float] = None, - hierarchy_group_temporal_total_weight: Optional[float] = None, - window_column: Optional[str] = None, - window_stride_length: Optional[int] = None, - window_max_count: Optional[int] = None, - holiday_regions: Optional[List[str]] = None, + disable_early_stopping: bool = False, + export_evaluated_data_items: bool = False, + export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, + export_evaluated_data_items_override_destination: bool = False, sync: bool = True, create_request_timeout: Optional[float] = None, ) -> models.Model: @@ -4071,47 +4499,30 @@ def run( Supported only for tabular Datasets. Args: - dataset (datasets.TimeSeriesDataset): + dataset (datasets.TabularDataset): Required. The dataset within the same Project from which data will be used to train the Model. The Dataset must use schema compatible with Model being trained, and what is compatible should be described in the used TrainingPipeline's [training_task_definition] [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. - For time series Datasets, all their data is exported to + For tabular Datasets, all their data is exported to training, to pick and choose from. target_column (str): - Required. Name of the column that the Model is to predict values for. This - column must be unavailable at forecast. - time_column (str): - Required. Name of the column that identifies time order in the time series. - This column must be available at forecast. - time_series_identifier_column (str): - Required. Name of the column that identifies the time series. - unavailable_at_forecast_columns (List[str]): - Required. Column names of columns that are unavailable at forecast. - Each column contains information for the given entity (identified by the - [time_series_identifier_column]) that is unknown before the forecast - (e.g. population of a city in a given year, or weather on a given day). - available_at_forecast_columns (List[str]): - Required. Column names of columns that are available at forecast. - Each column contains information for the given entity (identified by the - [time_series_identifier_column]) that is known at forecast. - forecast_horizon: (int): - Required. The amount of time into the future for which forecasted values for the target are - returned. Expressed in number of units defined by the [data_granularity_unit] and - [data_granularity_count] field. Inclusive. - data_granularity_unit (str): - Required. The data granularity unit. Accepted values are ``minute``, - ``hour``, ``day``, ``week``, ``month``, ``year``. - data_granularity_count (int): - Required. The number of data granularity units between data points in the training - data. If [data_granularity_unit] is `minute`, can be 1, 5, 10, 15, or 30. For all other - values of [data_granularity_unit], must be 1. + Required. The name of the column values of which the Model is to predict. + training_fraction_split (float): + Optional. The fraction of the input data that is to be used to train + the Model. This is ignored if Dataset is not provided. + validation_fraction_split (float): + Optional. The fraction of the input data that is to be used to validate + the Model. This is ignored if Dataset is not provided. + test_fraction_split (float): + Optional. The fraction of the input data that is to be used to evaluate + the Model. This is ignored if Dataset is not provided. predefined_split_column_name (str): Optional. The key is a name of one of the Dataset's data columns. The value of the key (either the label's value or - value in the column) must be one of {``TRAIN``, - ``VALIDATE``, ``TEST``}, and it defines to which set the + value in the column) must be one of {``training``, + ``validation``, ``test``}, and it defines to which set the given piece of data is assigned. If for a piece of data the key is not present or has an invalid value, that piece is ignored by the pipeline. @@ -4133,49 +4544,7 @@ def run( during Model training. The column must have numeric values between 0 and 10000 inclusively, and 0 value means that the row is ignored. If the weight column field is not set, then all rows are assumed to have - equal weight of 1. This column must be available at forecast. - time_series_attribute_columns (List[str]): - Optional. Column names that should be used as attribute columns. - Each column is constant within a time series. - context_window (int): - Optional. The amount of time into the past training and prediction data is used for - model training and prediction respectively. Expressed in number of units defined by the - [data_granularity_unit] and [data_granularity_count] fields. When not provided uses the - default value of 0 which means the model sets each series context window to be 0 (also - known as "cold start"). Inclusive. - export_evaluated_data_items (bool): - Whether to export the test set predictions to a BigQuery table. - If False, then the export is not performed. - export_evaluated_data_items_bigquery_destination_uri (string): - Optional. URI of desired destination BigQuery table for exported test set predictions. - - Expected format: - ``bq://::
`` - - If not specified, then results are exported to the following auto-created BigQuery - table: - ``:export_evaluated_examples__.evaluated_examples`` - - Applies only if [export_evaluated_data_items] is True. - export_evaluated_data_items_override_destination (bool): - Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], - if the table exists, for exported test set predictions. If False, and the - table exists, then the training job will fail. - - Applies only if [export_evaluated_data_items] is True and - [export_evaluated_data_items_bigquery_destination_uri] is specified. - quantiles (List[float]): - Quantiles to use for the ``minimize-quantile-loss`` - [AutoMLForecastingTrainingJob.optimization_objective]. This argument is required in - this case. - - Accepts up to 5 quantiles in the form of a double from 0 to 1, exclusive. - Each quantile must be unique. - validation_options (str): - Validation options for the data validation component. The available options are: - "fail-pipeline" - (default), will validate against the validation and fail the pipeline - if it fails. - "ignore-validation" - ignore the results of the validation and continue the pipeline + equal weight of 1. budget_milli_node_hours (int): Optional. The train budget of creating this Model, expressed in milli node hours i.e. 1,000 value in this field means 1 node hour. @@ -4204,122 +4573,290 @@ def run( are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. - additional_experiments (List[str]): - Optional. Additional experiment flags for the time series forcasting training. - create_request_timeout (float): - Optional. The timeout for the create request in seconds. - hierarchy_group_columns (List[str]): - Optional. A list of time series attribute column names that - define the time series hierarchy. Only one level of hierarchy is - supported, ex. ``region`` for a hierarchy of stores or - ``department`` for a hierarchy of products. If multiple columns - are specified, time series will be grouped by their combined - values, ex. (``blue``, ``large``) for ``color`` and ``size``, up - to 5 columns are accepted. If no group columns are specified, - all time series are considered to be part of the same group. - hierarchy_group_total_weight (float): - Optional. The weight of the loss for predictions aggregated over - time series in the same hierarchy group. - hierarchy_temporal_total_weight (float): - Optional. The weight of the loss for predictions aggregated over - the horizon for a single time series. - hierarchy_group_temporal_total_weight (float): - Optional. The weight of the loss for predictions aggregated over - both the horizon and time series in the same hierarchy group. - window_column (str): - Optional. Name of the column that should be used to filter input - rows. The column should contain either booleans or string - booleans; if the value of the row is True, generate a sliding - window from that row. - window_stride_length (int): - Optional. Step length used to generate input examples. Every - ``window_stride_length`` rows will be used to generate a sliding - window. - window_max_count (int): - Optional. Number of rows that should be used to generate input - examples. If the total row count is larger than this number, the - input data will be randomly sampled to hit the count. - holiday_regions (List[str]): - Optional. The geographical regions to use when creating holiday - features. This option is only allowed when data_granularity_unit - is ``day``. Acceptable values can come from any of the following - levels: - Top level: GLOBAL - Second level: continental regions - NA: North America - JAPAC: Japan and Asia Pacific - EMEA: Europe, the Middle East and Africa - LAC: Latin America and the Caribbean - Third level: countries from ISO 3166-1 Country codes. + disable_early_stopping (bool): + Required. If true, the entire budget is used. This disables the early stopping + feature. By default, the early stopping feature is enabled, which means + that training might stop before the entire training budget has been + used, if further training does no longer brings significant improvement + to the model. + export_evaluated_data_items (bool): + Whether to export the test set predictions to a BigQuery table. + If False, then the export is not performed. + export_evaluated_data_items_bigquery_destination_uri (string): + Optional. URI of desired destination BigQuery table for exported test set predictions. + + Expected format: + ``bq://::
`` + + If not specified, then results are exported to the following auto-created BigQuery + table: + ``:export_evaluated_examples__.evaluated_examples`` + + Applies only if [export_evaluated_data_items] is True. + export_evaluated_data_items_override_destination (bool): + Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], + if the table exists, for exported test set predictions. If False, and the + table exists, then the training job will fail. + + Applies only if [export_evaluated_data_items] is True and + [export_evaluated_data_items_bigquery_destination_uri] is specified. sync (bool): - Optional. Whether to execute this method synchronously. If False, this method + Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. + create_request_timeout (float): + Optional. The timeout for the create request in seconds. Returns: model: The trained Vertex AI Model resource or None if training did not produce a Vertex AI Model. - - Raises: - RuntimeError: If Training job has already been run or is waiting to run. """ - if model_display_name: - utils.validate_display_name(model_display_name) - if model_labels: - utils.validate_labels(model_labels) + training_task_definition = schema.training_job.definition.automl_tabular - if self._is_waiting_to_run(): - raise RuntimeError( - "AutoML Forecasting Training is already scheduled to run." + # auto-populate transformations + if self._column_transformations is None: + _LOGGER.info( + "No column transformations provided, so now retrieving columns from dataset in order to set default column transformations." ) - if self._has_run: - raise RuntimeError("AutoML Forecasting Training has already run.") + ( + self._column_transformations, + column_names, + ) = column_transformations_utils.get_default_column_transformations( + dataset=dataset, target_column=target_column + ) - if additional_experiments: - self._add_additional_experiments(additional_experiments) + _LOGGER.info( + "The column transformation of type 'auto' was set for the following columns: %s." + % column_names + ) - return self._run( + training_task_inputs_dict = { + # required inputs + "targetColumn": target_column, + "transformations": self._column_transformations, + "trainBudgetMilliNodeHours": budget_milli_node_hours, + # optional inputs + "weightColumnName": weight_column, + "disableEarlyStopping": disable_early_stopping, + "optimizationObjective": self._optimization_objective, + "predictionType": self._optimization_prediction_type, + "optimizationObjectiveRecallValue": self._optimization_objective_recall_value, + "optimizationObjectivePrecisionValue": self._optimization_objective_precision_value, + } + + final_export_eval_bq_uri = export_evaluated_data_items_bigquery_destination_uri + if final_export_eval_bq_uri and not final_export_eval_bq_uri.startswith( + "bq://" + ): + final_export_eval_bq_uri = f"bq://{final_export_eval_bq_uri}" + + if export_evaluated_data_items: + training_task_inputs_dict["exportEvaluatedDataItemsConfig"] = { + "destinationBigqueryUri": final_export_eval_bq_uri, + "overrideExistingTable": export_evaluated_data_items_override_destination, + } + + if self._additional_experiments: + training_task_inputs_dict[ + "additionalExperiments" + ] = self._additional_experiments + + model = gca_model.Model( + display_name=model_display_name or self._display_name, + labels=model_labels or self._labels, + encryption_spec=self._model_encryption_spec, + ) + + return self._run_job( + training_task_definition=training_task_definition, + training_task_inputs=training_task_inputs_dict, dataset=dataset, - target_column=target_column, - time_column=time_column, - time_series_identifier_column=time_series_identifier_column, - unavailable_at_forecast_columns=unavailable_at_forecast_columns, - available_at_forecast_columns=available_at_forecast_columns, - forecast_horizon=forecast_horizon, - data_granularity_unit=data_granularity_unit, - data_granularity_count=data_granularity_count, training_fraction_split=training_fraction_split, validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, predefined_split_column_name=predefined_split_column_name, timestamp_split_column_name=timestamp_split_column_name, - weight_column=weight_column, - time_series_attribute_columns=time_series_attribute_columns, - context_window=context_window, - budget_milli_node_hours=budget_milli_node_hours, - export_evaluated_data_items=export_evaluated_data_items, - export_evaluated_data_items_bigquery_destination_uri=export_evaluated_data_items_bigquery_destination_uri, - export_evaluated_data_items_override_destination=export_evaluated_data_items_override_destination, - quantiles=quantiles, - validation_options=validation_options, - model_display_name=model_display_name, - model_labels=model_labels, - hierarchy_group_columns=hierarchy_group_columns, - hierarchy_group_total_weight=hierarchy_group_total_weight, - hierarchy_temporal_total_weight=hierarchy_temporal_total_weight, - hierarchy_group_temporal_total_weight=hierarchy_group_temporal_total_weight, - window_column=window_column, - window_stride_length=window_stride_length, - window_max_count=window_max_count, - holiday_regions=holiday_regions, - sync=sync, + model=model, create_request_timeout=create_request_timeout, ) - @base.optional_sync() - def _run( + @property + def _model_upload_fail_string(self) -> str: + """Helper property for model upload failure.""" + return ( + f"Training Pipeline {self.resource_name} is not configured to upload a " + "Model." + ) + + def _add_additional_experiments(self, additional_experiments: List[str]): + """Add experiment flags to the training job. + Args: + additional_experiments (List[str]): + Experiment flags that can enable some experimental training features. + """ + self._additional_experiments.extend(additional_experiments) + + @staticmethod + def get_auto_column_specs( + dataset: datasets.TabularDataset, + target_column: str, + ) -> Dict[str, str]: + """Returns a dict with all non-target columns as keys and 'auto' as values. + + Example usage: + + column_specs = training_jobs.AutoMLTabularTrainingJob.get_auto_column_specs( + dataset=my_dataset, + target_column="my_target_column", + ) + + Args: + dataset (datasets.TabularDataset): + Required. Intended dataset. + target_column(str): + Required. Intended target column. + Returns: + Dict[str, str] + Column names as keys and 'auto' as values + """ + column_names = [ + column for column in dataset.column_names if column != target_column + ] + column_specs = {column: "auto" for column in column_names} + return column_specs + + class column_data_types: + AUTO = "auto" + NUMERIC = "numeric" + CATEGORICAL = "categorical" + TIMESTAMP = "timestamp" + TEXT = "text" + REPEATED_NUMERIC = "repeated_numeric" + REPEATED_CATEGORICAL = "repeated_categorical" + REPEATED_TEXT = "repeated_text" + + +class AutoMLForecastingTrainingJob(_ForecastingTrainingJob): + _model_type = "AutoML" + _training_task_definition = ( + schema.training_job.definition.automl_forecasting) + _supported_training_schemas = (schema.training_job.definition.automl_forecasting,) + + def __init__( + self, + display_name: Optional[str] = None, + optimization_objective: Optional[str] = None, + column_specs: Optional[Dict[str, str]] = None, + column_transformations: Optional[List[Dict[str, Dict[str, str]]]] = None, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, + training_encryption_spec_key_name: Optional[str] = None, + model_encryption_spec_key_name: Optional[str] = None, + ): + """Constructs a AutoML Forecasting Training Job. + + Args: + display_name (str): + Optional. The user-defined name of this TrainingPipeline. + optimization_objective (str): + Optional. Objective function the model is to be optimized towards. + The training process creates a Model that optimizes the value of the objective + function over the validation set. The supported optimization objectives: + "minimize-rmse" (default) - Minimize root-mean-squared error (RMSE). + "minimize-mae" - Minimize mean-absolute error (MAE). + "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE). + "minimize-rmspe" - Minimize root-mean-squared percentage error (RMSPE). + "minimize-wape-mae" - Minimize the combination of weighted absolute percentage error (WAPE) + and mean-absolute-error (MAE). + "minimize-quantile-loss" - Minimize the quantile loss at the defined quantiles. + (Set this objective to build quantile forecasts.) + column_specs (Dict[str, str]): + Optional. Alternative to column_transformations where the keys of the dict + are column names and their respective values are one of + AutoMLTabularTrainingJob.column_data_types. + When creating transformation for BigQuery Struct column, the column + should be flattened using "." as the delimiter. Only columns with no child + should have a transformation. + If an input column has no transformations on it, such a column is + ignored by the training, except for the targetColumn, which should have + no transformations defined on. + Only one of column_transformations or column_specs should be passed. + column_transformations (List[Dict[str, Dict[str, str]]]): + Optional. Transformations to apply to the input columns (i.e. columns other + than the targetColumn). Each transformation may produce multiple + result values from the column's value, and all are used for training. + When creating transformation for BigQuery Struct column, the column + should be flattened using "." as the delimiter. Only columns with no child + should have a transformation. + If an input column has no transformations on it, such a column is + ignored by the training, except for the targetColumn, which should have + no transformations defined on. + Only one of column_transformations or column_specs should be passed. + Consider using column_specs as column_transformations will be deprecated eventually. + project (str): + Optional. Project to run training in. Overrides project set in aiplatform.init. + location (str): + Optional. Location to run training in. Overrides location set in aiplatform.init. + credentials (auth_credentials.Credentials): + Optional. Custom credentials to use to run call training service. Overrides + credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize TrainingPipelines. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. + training_encryption_spec_key_name (Optional[str]): + Optional. The Cloud KMS resource identifier of the customer + managed encryption key used to protect the training pipeline. Has the + form: + ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute + resource is created. + + If set, this TrainingPipeline will be secured by this key. + + Note: Model trained by this TrainingPipeline is also secured + by this key if ``model_to_upload`` is not set separately. + + Overrides encryption_spec_key_name set in aiplatform.init. + model_encryption_spec_key_name (Optional[str]): + Optional. The Cloud KMS resource identifier of the customer + managed encryption key used to protect the model. Has the + form: + ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute + resource is created. + + If set, the trained Model will be secured by this key. + + Overrides encryption_spec_key_name set in aiplatform.init. + + Raises: + ValueError: If both column_transformations and column_specs were provided. + """ + super().__init__( + display_name=display_name, + optimization_objective=optimization_objective, + column_specs=column_specs, + column_transformations=column_transformations, + project=project, + location=location, + credentials=credentials, + labels=labels, + training_encryption_spec_key_name=training_encryption_spec_key_name, + model_encryption_spec_key_name=model_encryption_spec_key_name, + ) + + def run( self, dataset: datasets.TimeSeriesDataset, target_column: str, @@ -4346,6 +4883,7 @@ def _run( budget_milli_node_hours: int = 1000, model_display_name: Optional[str] = None, model_labels: Optional[Dict[str, str]] = None, + additional_experiments: Optional[List[str]] = None, hierarchy_group_columns: Optional[List[str]] = None, hierarchy_group_total_weight: Optional[float] = None, hierarchy_temporal_total_weight: Optional[float] = None, @@ -4416,20 +4954,11 @@ def _run( Required. The number of data granularity units between data points in the training data. If [data_granularity_unit] is `minute`, can be 1, 5, 10, 15, or 30. For all other values of [data_granularity_unit], must be 1. - training_fraction_split (float): - Optional. The fraction of the input data that is to be used to train - the Model. This is ignored if Dataset is not provided. - validation_fraction_split (float): - Optional. The fraction of the input data that is to be used to validate - the Model. This is ignored if Dataset is not provided. - test_fraction_split (float): - Optional. The fraction of the input data that is to be used to evaluate - the Model. This is ignored if Dataset is not provided. predefined_split_column_name (str): Optional. The key is a name of one of the Dataset's data columns. The value of the key (either the label's value or - value in the column) must be one of {``training``, - ``validation``, ``test``}, and it defines to which set the + value in the column) must be one of {``TRAIN``, + ``VALIDATE``, ``TEST``}, and it defines to which set the given piece of data is assigned. If for a piece of data the key is not present or has an invalid value, that piece is ignored by the pipeline. @@ -4456,9 +4985,10 @@ def _run( Optional. Column names that should be used as attribute columns. Each column is constant within a time series. context_window (int): - Optional. The number of periods offset into the past to restrict past sequence, where each - period is one unit of granularity as defined by [period]. When not provided uses the - default value of 0 which means the model sets each series historical window to be 0 (also + Optional. The amount of time into the past training and prediction data is used for + model training and prediction respectively. Expressed in number of units defined by the + [data_granularity_unit] and [data_granularity_count] fields. When not provided uses the + default value of 0 which means the model sets each series context window to be 0 (also known as "cold start"). Inclusive. export_evaluated_data_items (bool): Whether to export the test set predictions to a BigQuery table. @@ -4482,7 +5012,7 @@ def _run( Applies only if [export_evaluated_data_items] is True and [export_evaluated_data_items_bigquery_destination_uri] is specified. quantiles (List[float]): - Quantiles to use for the `minimize-quantile-loss` + Quantiles to use for the ``minimize-quantile-loss`` [AutoMLForecastingTrainingJob.optimization_objective]. This argument is required in this case. @@ -4521,6 +5051,10 @@ def _run( are allowed. See https://goo.gl/xmQnxf for more information and examples of labels. + additional_experiments (List[str]): + Optional. Additional experiment flags for the time series forcasting training. + create_request_timeout (float): + Optional. The timeout for the create request in seconds. hierarchy_group_columns (List[str]): Optional. A list of time series attribute column names that define the time series hierarchy. Only one level of hierarchy is @@ -4565,183 +5099,57 @@ def _run( LAC: Latin America and the Caribbean Third level: countries from ISO 3166-1 Country codes. sync (bool): - Whether to execute this method synchronously. If False, this method + Optional. Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will be immediately returned and synced when the Future has completed. - create_request_timeout (float): - Optional. The timeout for the create request in seconds. Returns: model: The trained Vertex AI Model resource or None if training did not produce a Vertex AI Model. - """ - - training_task_definition = schema.training_job.definition.automl_forecasting - - # auto-populate transformations - if self._column_transformations is None: - _LOGGER.info( - "No column transformations provided, so now retrieving columns from dataset in order to set default column transformations." - ) - - ( - self._column_transformations, - column_names, - ) = dataset._get_default_column_transformations(target_column) - - _LOGGER.info( - "The column transformation of type 'auto' was set for the following columns: %s." - % column_names - ) - - window_config = self._create_window_config( - column=window_column, - stride_length=window_stride_length, - max_count=window_max_count, - ) - - training_task_inputs_dict = { - # required inputs - "targetColumn": target_column, - "timeColumn": time_column, - "timeSeriesIdentifierColumn": time_series_identifier_column, - "timeSeriesAttributeColumns": time_series_attribute_columns, - "unavailableAtForecastColumns": unavailable_at_forecast_columns, - "availableAtForecastColumns": available_at_forecast_columns, - "forecastHorizon": forecast_horizon, - "dataGranularity": { - "unit": data_granularity_unit, - "quantity": data_granularity_count, - }, - "transformations": self._column_transformations, - "trainBudgetMilliNodeHours": budget_milli_node_hours, - # optional inputs - "weightColumn": weight_column, - "contextWindow": context_window, - "quantiles": quantiles, - "validationOptions": validation_options, - "optimizationObjective": self._optimization_objective, - "holidayRegions": holiday_regions, - } - - # TODO(TheMichaelHu): Remove the ifs once the API supports these inputs. - if any( - [ - hierarchy_group_columns, - hierarchy_group_total_weight, - hierarchy_temporal_total_weight, - hierarchy_group_temporal_total_weight, - ] - ): - training_task_inputs_dict["hierarchyConfig"] = { - "groupColumns": hierarchy_group_columns, - "groupTotalWeight": hierarchy_group_total_weight, - "temporalTotalWeight": hierarchy_temporal_total_weight, - "groupTemporalTotalWeight": hierarchy_group_temporal_total_weight, - } - if window_config: - training_task_inputs_dict["windowConfig"] = window_config - - final_export_eval_bq_uri = export_evaluated_data_items_bigquery_destination_uri - if final_export_eval_bq_uri and not final_export_eval_bq_uri.startswith( - "bq://" - ): - final_export_eval_bq_uri = f"bq://{final_export_eval_bq_uri}" - - if export_evaluated_data_items: - training_task_inputs_dict["exportEvaluatedDataItemsConfig"] = { - "destinationBigqueryUri": final_export_eval_bq_uri, - "overrideExistingTable": export_evaluated_data_items_override_destination, - } - - if self._additional_experiments: - training_task_inputs_dict[ - "additionalExperiments" - ] = self._additional_experiments - model = gca_model.Model( - display_name=model_display_name or self._display_name, - labels=model_labels or self._labels, - encryption_spec=self._model_encryption_spec, - ) + Raises: + RuntimeError: If Training job has already been run or is waiting to run. + """ - new_model = self._run_job( - training_task_definition=training_task_definition, - training_task_inputs=training_task_inputs_dict, + return super().run( dataset=dataset, + target_column=target_column, + time_column=time_column, + time_series_identifier_column=time_series_identifier_column, + unavailable_at_forecast_columns=unavailable_at_forecast_columns, + available_at_forecast_columns=available_at_forecast_columns, + forecast_horizon=forecast_horizon, + data_granularity_unit=data_granularity_unit, + data_granularity_count=data_granularity_count, training_fraction_split=training_fraction_split, validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, predefined_split_column_name=predefined_split_column_name, timestamp_split_column_name=timestamp_split_column_name, - model=model, + weight_column=weight_column, + time_series_attribute_columns=time_series_attribute_columns, + context_window=context_window, + budget_milli_node_hours=budget_milli_node_hours, + export_evaluated_data_items=export_evaluated_data_items, + export_evaluated_data_items_bigquery_destination_uri=export_evaluated_data_items_bigquery_destination_uri, + export_evaluated_data_items_override_destination=export_evaluated_data_items_override_destination, + quantiles=quantiles, + validation_options=validation_options, + model_display_name=model_display_name, + model_labels=model_labels, + additional_experiments=additional_experiments, + hierarchy_group_columns=hierarchy_group_columns, + hierarchy_group_total_weight=hierarchy_group_total_weight, + hierarchy_temporal_total_weight=hierarchy_temporal_total_weight, + hierarchy_group_temporal_total_weight=hierarchy_group_temporal_total_weight, + window_column=window_column, + window_stride_length=window_stride_length, + window_max_count=window_max_count, + holiday_regions=holiday_regions, + sync=sync, create_request_timeout=create_request_timeout, ) - if export_evaluated_data_items: - _LOGGER.info( - "Exported examples available at:\n%s" - % self.evaluated_data_items_bigquery_uri - ) - - return new_model - - @property - def _model_upload_fail_string(self) -> str: - """Helper property for model upload failure.""" - return ( - f"Training Pipeline {self.resource_name} is not configured to upload a " - "Model." - ) - - @property - def evaluated_data_items_bigquery_uri(self) -> Optional[str]: - """BigQuery location of exported evaluated examples from the Training Job - Returns: - str: BigQuery uri for the exported evaluated examples if the export - feature is enabled for training. - None: If the export feature was not enabled for training. - """ - - self._assert_gca_resource_is_available() - - metadata = self._gca_resource.training_task_metadata - if metadata and "evaluatedDataItemsBigqueryUri" in metadata: - return metadata["evaluatedDataItemsBigqueryUri"] - - return None - - def _add_additional_experiments(self, additional_experiments: List[str]): - """Add experiment flags to the training job. - Args: - additional_experiments (List[str]): - Experiment flags that can enable some experimental training features. - """ - self._additional_experiments.extend(additional_experiments) - - @staticmethod - def _create_window_config( - column: Optional[str] = None, - stride_length: Optional[int] = None, - max_count: Optional[int] = None, - ) -> Optional[Dict[str, Union[int, str]]]: - """Creates a window config from training job arguments.""" - configs = { - "column": column, - "strideLength": stride_length, - "maxCount": max_count, - } - present_configs = {k: v for k, v in configs.items() if v is not None} - if not present_configs: - return None - if len(present_configs) > 1: - raise ValueError( - "More than one windowing strategy provided. Make sure only one " - "of window_column, window_stride_length, or window_max_count " - "is specified." - ) - return present_configs - class AutoMLImageTrainingJob(_TrainingJob): _supported_training_schemas = ( diff --git a/tests/unit/aiplatform/test_automl_forecasting_training_jobs.py b/tests/unit/aiplatform/test_automl_forecasting_training_jobs.py index 21ca78da2e..64e85befa6 100644 --- a/tests/unit/aiplatform/test_automl_forecasting_training_jobs.py +++ b/tests/unit/aiplatform/test_automl_forecasting_training_jobs.py @@ -24,7 +24,7 @@ from google.cloud.aiplatform import datasets from google.cloud.aiplatform import initializer from google.cloud.aiplatform import schema -from google.cloud.aiplatform.training_jobs import AutoMLForecastingTrainingJob +from google.cloud.aiplatform import training_jobs from google.cloud.aiplatform.compat.services import ( model_service_client, @@ -266,7 +266,7 @@ def mock_dataset_nontimeseries(): @pytest.mark.usefixtures("google_auth_mock") -class TestAutoMLForecastingTrainingJob: +class TestForecastingTrainingJob: def setup_method(self): importlib.reload(initializer) importlib.reload(aiplatform) @@ -275,6 +275,13 @@ def teardown_method(self): initializer.global_pool.shutdown(wait=True) @pytest.mark.parametrize("sync", [True, False]) + @pytest.mark.parametrize( + "training_job", + [ + training_jobs.AutoMLForecastingTrainingJob, + training_jobs.SequenceToSequencePlusForecastingTrainingJob, + ], + ) def test_run_call_pipeline_service_create( self, mock_pipeline_service_create, @@ -282,10 +289,11 @@ def test_run_call_pipeline_service_create( mock_dataset_time_series, mock_model_service_get, sync, + training_job, ): aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME) - job = AutoMLForecastingTrainingJob( + job = training_job( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -344,7 +352,7 @@ def test_run_call_pipeline_service_create( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, - training_task_definition=schema.training_job.definition.automl_forecasting, + training_task_definition=training_job._training_task_definition, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_ADDITIONAL_EXPERIMENTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, @@ -371,6 +379,13 @@ def test_run_call_pipeline_service_create( assert job.state == gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED @pytest.mark.parametrize("sync", [True, False]) + @pytest.mark.parametrize( + "training_job", + [ + training_jobs.AutoMLForecastingTrainingJob, + training_jobs.SequenceToSequencePlusForecastingTrainingJob, + ], + ) def test_run_call_pipeline_service_create_with_timeout( self, mock_pipeline_service_create, @@ -378,10 +393,11 @@ def test_run_call_pipeline_service_create_with_timeout( mock_dataset_time_series, mock_model_service_get, sync, + training_job, ): aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME) - job = AutoMLForecastingTrainingJob( + job = training_job( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -440,7 +456,7 @@ def test_run_call_pipeline_service_create_with_timeout( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, - training_task_definition=schema.training_job.definition.automl_forecasting, + training_task_definition=training_job._training_task_definition, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_ADDITIONAL_EXPERIMENTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, @@ -454,16 +470,24 @@ def test_run_call_pipeline_service_create_with_timeout( @pytest.mark.usefixtures("mock_pipeline_service_get") @pytest.mark.parametrize("sync", [True, False]) + @pytest.mark.parametrize( + "training_job", + [ + training_jobs.AutoMLForecastingTrainingJob, + training_jobs.SequenceToSequencePlusForecastingTrainingJob, + ], + ) def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( self, mock_pipeline_service_create, mock_dataset_time_series, mock_model_service_get, sync, + training_job, ): aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME) - job = AutoMLForecastingTrainingJob( + job = training_job( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -517,7 +541,7 @@ def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, - training_task_definition=schema.training_job.definition.automl_forecasting, + training_task_definition=training_job._training_task_definition, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, @@ -531,16 +555,24 @@ def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( @pytest.mark.usefixtures("mock_pipeline_service_get") @pytest.mark.parametrize("sync", [True, False]) + @pytest.mark.parametrize( + "training_job", + [ + training_jobs.AutoMLForecastingTrainingJob, + training_jobs.SequenceToSequencePlusForecastingTrainingJob, + ], + ) def test_run_call_pipeline_if_set_additional_experiments( self, mock_pipeline_service_create, mock_dataset_time_series, mock_model_service_get, sync, + training_job, ): aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME) - job = AutoMLForecastingTrainingJob( + job = training_job( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -591,7 +623,7 @@ def test_run_call_pipeline_if_set_additional_experiments( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, - training_task_definition=schema.training_job.definition.automl_forecasting, + training_task_definition=training_job._training_task_definition, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_ADDITIONAL_EXPERIMENTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, @@ -609,14 +641,22 @@ def test_run_call_pipeline_if_set_additional_experiments( "mock_model_service_get", ) @pytest.mark.parametrize("sync", [True, False]) + @pytest.mark.parametrize( + "training_job", + [ + training_jobs.AutoMLForecastingTrainingJob, + training_jobs.SequenceToSequencePlusForecastingTrainingJob, + ], + ) def test_run_called_twice_raises( self, mock_dataset_time_series, sync, + training_job, ): aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME) - job = AutoMLForecastingTrainingJob( + job = training_job( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -686,16 +726,24 @@ def test_run_called_twice_raises( ) @pytest.mark.parametrize("sync", [True, False]) + @pytest.mark.parametrize( + "training_job", + [ + training_jobs.AutoMLForecastingTrainingJob, + training_jobs.SequenceToSequencePlusForecastingTrainingJob, + ], + ) def test_run_raises_if_pipeline_fails( self, mock_pipeline_service_create_and_get_with_fail, mock_dataset_time_series, sync, + training_job, ): aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME) - job = AutoMLForecastingTrainingJob( + job = training_job( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -739,10 +787,21 @@ def test_run_raises_if_pipeline_fails( with pytest.raises(RuntimeError): job.get_model() - def test_raises_before_run_is_called(self, mock_pipeline_service_create): + @pytest.mark.parametrize( + "training_job", + [ + training_jobs.AutoMLForecastingTrainingJob, + training_jobs.SequenceToSequencePlusForecastingTrainingJob, + ], + ) + def test_raises_before_run_is_called( + self, + mock_pipeline_service_create, + training_job, + ): aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME) - job = AutoMLForecastingTrainingJob( + job = training_job( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -758,6 +817,13 @@ def test_raises_before_run_is_called(self, mock_pipeline_service_create): job.state @pytest.mark.parametrize("sync", [True, False]) + @pytest.mark.parametrize( + "training_job", + [ + training_jobs.AutoMLForecastingTrainingJob, + training_jobs.SequenceToSequencePlusForecastingTrainingJob, + ], + ) def test_splits_fraction( self, mock_pipeline_service_create, @@ -765,10 +831,11 @@ def test_splits_fraction( mock_dataset_time_series, mock_model_service_get, sync, + training_job, ): """ Initiate aiplatform with encryption key name. - Create and run an AutoML Video Classification training job, verify calls and return value + Create and run an Forecasting training job, verify calls and return value """ aiplatform.init( @@ -776,7 +843,7 @@ def test_splits_fraction( encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, ) - job = AutoMLForecastingTrainingJob( + job = training_job( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -838,7 +905,7 @@ def test_splits_fraction( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, - training_task_definition=schema.training_job.definition.automl_forecasting, + training_task_definition=training_job._training_task_definition, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, @@ -852,6 +919,13 @@ def test_splits_fraction( ) @pytest.mark.parametrize("sync", [True, False]) + @pytest.mark.parametrize( + "training_job", + [ + training_jobs.AutoMLForecastingTrainingJob, + training_jobs.SequenceToSequencePlusForecastingTrainingJob, + ], + ) def test_splits_timestamp( self, mock_pipeline_service_create, @@ -859,10 +933,11 @@ def test_splits_timestamp( mock_dataset_time_series, mock_model_service_get, sync, + training_job, ): """Initiate aiplatform with encryption key name. - Create and run an AutoML Forecasting training job, verify calls and + Create and run an Forecasting training job, verify calls and return value """ @@ -871,7 +946,7 @@ def test_splits_timestamp( encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, ) - job = AutoMLForecastingTrainingJob( + job = training_job( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -934,9 +1009,7 @@ def test_splits_timestamp( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, - training_task_definition=( - schema.training_job.definition.automl_forecasting - ), + training_task_definition=training_job._training_task_definition, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, @@ -950,6 +1023,13 @@ def test_splits_timestamp( ) @pytest.mark.parametrize("sync", [True, False]) + @pytest.mark.parametrize( + "training_job", + [ + training_jobs.AutoMLForecastingTrainingJob, + training_jobs.SequenceToSequencePlusForecastingTrainingJob, + ], + ) def test_splits_predefined( self, mock_pipeline_service_create, @@ -957,10 +1037,11 @@ def test_splits_predefined( mock_dataset_time_series, mock_model_service_get, sync, + training_job, ): """ Initiate aiplatform with encryption key name. - Create and run an AutoML Video Classification training job, verify calls and return value + Create and run an Forecasting training job, verify calls and return value """ aiplatform.init( @@ -968,7 +1049,7 @@ def test_splits_predefined( encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, ) - job = AutoMLForecastingTrainingJob( + job = training_job( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -1026,7 +1107,7 @@ def test_splits_predefined( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, - training_task_definition=schema.training_job.definition.automl_forecasting, + training_task_definition=training_job._training_task_definition, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, @@ -1040,6 +1121,13 @@ def test_splits_predefined( ) @pytest.mark.parametrize("sync", [True, False]) + @pytest.mark.parametrize( + "training_job", + [ + training_jobs.AutoMLForecastingTrainingJob, + training_jobs.SequenceToSequencePlusForecastingTrainingJob, + ], + ) def test_splits_default( self, mock_pipeline_service_create, @@ -1047,10 +1135,11 @@ def test_splits_default( mock_dataset_time_series, mock_model_service_get, sync, + training_job, ): """ Initiate aiplatform with encryption key name. - Create and run an AutoML Video Classification training job, verify calls and return value + Create and run an Forecasting training job, verify calls and return value """ aiplatform.init( @@ -1058,7 +1147,7 @@ def test_splits_default( encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, ) - job = AutoMLForecastingTrainingJob( + job = training_job( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -1110,7 +1199,7 @@ def test_splits_default( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, - training_task_definition=schema.training_job.definition.automl_forecasting, + training_task_definition=training_job._training_task_definition, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, input_data_config=true_input_data_config, From 26c70a21d5dade49d4faf2f70dee1c2c10b7e6b0 Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Mon, 9 May 2022 10:16:57 -0400 Subject: [PATCH 02/14] Add a seq2seq training job. --- google/cloud/aiplatform/training_jobs.py | 363 +++++++++++++++++++++++ 1 file changed, 363 insertions(+) diff --git a/google/cloud/aiplatform/training_jobs.py b/google/cloud/aiplatform/training_jobs.py index 3f308b510a..3d8ebdcea1 100644 --- a/google/cloud/aiplatform/training_jobs.py +++ b/google/cloud/aiplatform/training_jobs.py @@ -5151,6 +5151,369 @@ def run( ) +class SequenceToSequencePlusForecastingTrainingJob(_ForecastingTrainingJob): + _model_type = "Seq2Seq" + _training_task_definition = ( + schema.training_job.definition.seq2seq_forecasting) + _supported_training_schemas = (schema.training_job.definition.seq2seq_forecasting,) + + def __init__( + self, + display_name: Optional[str] = None, + optimization_objective: Optional[str] = None, + column_specs: Optional[Dict[str, str]] = None, + column_transformations: Optional[List[Dict[str, Dict[str, str]]]] = None, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, + training_encryption_spec_key_name: Optional[str] = None, + model_encryption_spec_key_name: Optional[str] = None, + ): + """Constructs a Seq2Seq Forecasting Training Job. + + Args: + display_name (str): + Optional. The user-defined name of this TrainingPipeline. + optimization_objective (str): + Optional. Objective function the model is to be optimized towards. + The training process creates a Model that optimizes the value of the objective + function over the validation set. The supported optimization objectives: + "minimize-rmse" (default) - Minimize root-mean-squared error (RMSE). + "minimize-mae" - Minimize mean-absolute error (MAE). + "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE). + "minimize-rmspe" - Minimize root-mean-squared percentage error (RMSPE). + "minimize-wape-mae" - Minimize the combination of weighted absolute percentage error (WAPE) + and mean-absolute-error (MAE). + "minimize-quantile-loss" - Minimize the quantile loss at the defined quantiles. + (Set this objective to build quantile forecasts.) + column_specs (Dict[str, str]): + Optional. Alternative to column_transformations where the keys of the dict + are column names and their respective values are one of + AutoMLTabularTrainingJob.column_data_types. + When creating transformation for BigQuery Struct column, the column + should be flattened using "." as the delimiter. Only columns with no child + should have a transformation. + If an input column has no transformations on it, such a column is + ignored by the training, except for the targetColumn, which should have + no transformations defined on. + Only one of column_transformations or column_specs should be passed. + column_transformations (List[Dict[str, Dict[str, str]]]): + Optional. Transformations to apply to the input columns (i.e. columns other + than the targetColumn). Each transformation may produce multiple + result values from the column's value, and all are used for training. + When creating transformation for BigQuery Struct column, the column + should be flattened using "." as the delimiter. Only columns with no child + should have a transformation. + If an input column has no transformations on it, such a column is + ignored by the training, except for the targetColumn, which should have + no transformations defined on. + Only one of column_transformations or column_specs should be passed. + Consider using column_specs as column_transformations will be deprecated eventually. + project (str): + Optional. Project to run training in. Overrides project set in aiplatform.init. + location (str): + Optional. Location to run training in. Overrides location set in aiplatform.init. + credentials (auth_credentials.Credentials): + Optional. Custom credentials to use to run call training service. Overrides + credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize TrainingPipelines. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. + training_encryption_spec_key_name (Optional[str]): + Optional. The Cloud KMS resource identifier of the customer + managed encryption key used to protect the training pipeline. Has the + form: + ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute + resource is created. + + If set, this TrainingPipeline will be secured by this key. + + Note: Model trained by this TrainingPipeline is also secured + by this key if ``model_to_upload`` is not set separately. + + Overrides encryption_spec_key_name set in aiplatform.init. + model_encryption_spec_key_name (Optional[str]): + Optional. The Cloud KMS resource identifier of the customer + managed encryption key used to protect the model. Has the + form: + ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. + The key needs to be in the same region as where the compute + resource is created. + + If set, the trained Model will be secured by this key. + + Overrides encryption_spec_key_name set in aiplatform.init. + + Raises: + ValueError: If both column_transformations and column_specs were provided. + """ + super().__init__( + display_name=display_name, + optimization_objective=optimization_objective, + column_specs=column_specs, + column_transformations=column_transformations, + project=project, + location=location, + credentials=credentials, + labels=labels, + training_encryption_spec_key_name=training_encryption_spec_key_name, + model_encryption_spec_key_name=model_encryption_spec_key_name, + ) + + def run( + self, + dataset: datasets.TimeSeriesDataset, + target_column: str, + time_column: str, + time_series_identifier_column: str, + unavailable_at_forecast_columns: List[str], + available_at_forecast_columns: List[str], + forecast_horizon: int, + data_granularity_unit: str, + data_granularity_count: int, + training_fraction_split: Optional[float] = None, + validation_fraction_split: Optional[float] = None, + test_fraction_split: Optional[float] = None, + predefined_split_column_name: Optional[str] = None, + timestamp_split_column_name: Optional[str] = None, + weight_column: Optional[str] = None, + time_series_attribute_columns: Optional[List[str]] = None, + context_window: Optional[int] = None, + export_evaluated_data_items: bool = False, + export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, + export_evaluated_data_items_override_destination: bool = False, + quantiles: Optional[List[float]] = None, + validation_options: Optional[str] = None, + budget_milli_node_hours: int = 1000, + model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, + additional_experiments: Optional[List[str]] = None, + sync: bool = True, + create_request_timeout: Optional[float] = None, + ) -> models.Model: + """Runs the training job and returns a model. + + If training on a Vertex AI dataset, you can use one of the following split configurations: + Data fraction splits: + Any of ``training_fraction_split``, ``validation_fraction_split`` and + ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If + the provided ones sum to less than 1, the remainder is assigned to sets as + decided by Vertex AI. If none of the fractions are set, by default roughly 80% + of data will be used for training, 10% for validation, and 10% for test. + + Predefined splits: + Assigns input data to training, validation, and test sets based on the value of a provided key. + If using predefined splits, ``predefined_split_column_name`` must be provided. + Supported only for tabular Datasets. + + Timestamp splits: + Assigns input data to training, validation, and test sets + based on a provided timestamps. The youngest data pieces are + assigned to training set, next to validation set, and the oldest + to the test set. + Supported only for tabular Datasets. + + Args: + dataset (datasets.TimeSeriesDataset): + Required. The dataset within the same Project from which data will be used to train the Model. The + Dataset must use schema compatible with Model being trained, + and what is compatible should be described in the used + TrainingPipeline's [training_task_definition] + [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. + For time series Datasets, all their data is exported to + training, to pick and choose from. + target_column (str): + Required. Name of the column that the Model is to predict values for. This + column must be unavailable at forecast. + time_column (str): + Required. Name of the column that identifies time order in the time series. + This column must be available at forecast. + time_series_identifier_column (str): + Required. Name of the column that identifies the time series. + unavailable_at_forecast_columns (List[str]): + Required. Column names of columns that are unavailable at forecast. + Each column contains information for the given entity (identified by the + [time_series_identifier_column]) that is unknown before the forecast + (e.g. population of a city in a given year, or weather on a given day). + available_at_forecast_columns (List[str]): + Required. Column names of columns that are available at forecast. + Each column contains information for the given entity (identified by the + [time_series_identifier_column]) that is known at forecast. + forecast_horizon: (int): + Required. The amount of time into the future for which forecasted values for the target are + returned. Expressed in number of units defined by the [data_granularity_unit] and + [data_granularity_count] field. Inclusive. + data_granularity_unit (str): + Required. The data granularity unit. Accepted values are ``minute``, + ``hour``, ``day``, ``week``, ``month``, ``year``. + data_granularity_count (int): + Required. The number of data granularity units between data points in the training + data. If [data_granularity_unit] is `minute`, can be 1, 5, 10, 15, or 30. For all other + values of [data_granularity_unit], must be 1. + predefined_split_column_name (str): + Optional. The key is a name of one of the Dataset's data + columns. The value of the key (either the label's value or + value in the column) must be one of {``TRAIN``, + ``VALIDATE``, ``TEST``}, and it defines to which set the + given piece of data is assigned. If for a piece of data the + key is not present or has an invalid value, that piece is + ignored by the pipeline. + + Supported only for tabular and time series Datasets. + timestamp_split_column_name (str): + Optional. The key is a name of one of the Dataset's data + columns. The value of the key values of the key (the values in + the column) must be in RFC 3339 `date-time` format, where + `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a + piece of data the key is not present or has an invalid value, + that piece is ignored by the pipeline. + Supported only for tabular and time series Datasets. + This parameter must be used with training_fraction_split, + validation_fraction_split, and test_fraction_split. + weight_column (str): + Optional. Name of the column that should be used as the weight column. + Higher values in this column give more importance to the row + during Model training. The column must have numeric values between 0 and + 10000 inclusively, and 0 value means that the row is ignored. + If the weight column field is not set, then all rows are assumed to have + equal weight of 1. This column must be available at forecast. + time_series_attribute_columns (List[str]): + Optional. Column names that should be used as attribute columns. + Each column is constant within a time series. + context_window (int): + Optional. The amount of time into the past training and prediction data is used for + model training and prediction respectively. Expressed in number of units defined by the + [data_granularity_unit] and [data_granularity_count] fields. When not provided uses the + default value of 0 which means the model sets each series context window to be 0 (also + known as "cold start"). Inclusive. + export_evaluated_data_items (bool): + Whether to export the test set predictions to a BigQuery table. + If False, then the export is not performed. + export_evaluated_data_items_bigquery_destination_uri (string): + Optional. URI of desired destination BigQuery table for exported test set predictions. + + Expected format: + ``bq://::
`` + + If not specified, then results are exported to the following auto-created BigQuery + table: + ``:export_evaluated_examples__.evaluated_examples`` + + Applies only if [export_evaluated_data_items] is True. + export_evaluated_data_items_override_destination (bool): + Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], + if the table exists, for exported test set predictions. If False, and the + table exists, then the training job will fail. + + Applies only if [export_evaluated_data_items] is True and + [export_evaluated_data_items_bigquery_destination_uri] is specified. + quantiles (List[float]): + Quantiles to use for the `minimize-quantile-loss` + [AutoMLForecastingTrainingJob.optimization_objective]. This argument is required in + this case. + + Accepts up to 5 quantiles in the form of a double from 0 to 1, exclusive. + Each quantile must be unique. + validation_options (str): + Validation options for the data validation component. The available options are: + "fail-pipeline" - (default), will validate against the validation and fail the pipeline + if it fails. + "ignore-validation" - ignore the results of the validation and continue the pipeline + budget_milli_node_hours (int): + Optional. The train budget of creating this Model, expressed in milli node + hours i.e. 1,000 value in this field means 1 node hour. + The training cost of the model will not exceed this budget. The final + cost will be attempted to be close to the budget, though may end up + being (even) noticeably smaller - at the backend's discretion. This + especially may happen when further model training ceases to provide + any improvements. + If the budget is set to a value known to be insufficient to train a + Model for the given training set, the training won't be attempted and + will error. + The minimum value is 1000 and the maximum is 72000. + model_display_name (str): + Optional. If the script produces a managed Vertex AI Model. The display name of + the Model. The name can be up to 128 characters long and can be consist + of any UTF-8 characters. + + If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. + additional_experiments (List[str]): + Optional. Additional experiment flags for the time series forcasting training. + create_request_timeout (float): + Optional. The timeout for the create request in seconds. + sync (bool): + Whether to execute this method synchronously. If False, this method + will be executed in concurrent Future and any downstream object will + be immediately returned and synced when the Future has completed. + Returns: + model: The trained Vertex AI Model resource or None if training did not + produce a Vertex AI Model. + + Raises: + RuntimeError: If Training job has already been run or is waiting to run. + """ + + return super().run( + dataset=dataset, + target_column=target_column, + time_column=time_column, + time_series_identifier_column=time_series_identifier_column, + unavailable_at_forecast_columns=unavailable_at_forecast_columns, + available_at_forecast_columns=available_at_forecast_columns, + forecast_horizon=forecast_horizon, + data_granularity_unit=data_granularity_unit, + data_granularity_count=data_granularity_count, + training_fraction_split=training_fraction_split, + validation_fraction_split=validation_fraction_split, + test_fraction_split=test_fraction_split, + predefined_split_column_name=predefined_split_column_name, + timestamp_split_column_name=timestamp_split_column_name, + weight_column=weight_column, + time_series_attribute_columns=time_series_attribute_columns, + context_window=context_window, + budget_milli_node_hours=budget_milli_node_hours, + export_evaluated_data_items=export_evaluated_data_items, + export_evaluated_data_items_bigquery_destination_uri=export_evaluated_data_items_bigquery_destination_uri, + export_evaluated_data_items_override_destination=export_evaluated_data_items_override_destination, + quantiles=quantiles, + validation_options=validation_options, + model_display_name=model_display_name, + model_labels=model_labels, + additional_experiments=additional_experiments, + sync=sync, + create_request_timeout=create_request_timeout, + ) + + @property + def evaluated_data_items_bigquery_uri(self) -> Optional[str]: + """BigQuery location of exported evaluated examples from the Training Job + Returns: + str: BigQuery uri for the exported evaluated examples if the export + feature is enabled for training. + None: If the export feature was not enabled for training. + """ + return super().evaluated_data_items_bigquery_uri + + class AutoMLImageTrainingJob(_TrainingJob): _supported_training_schemas = ( schema.training_job.definition.automl_image_classification, From 7505165a54bc45b93054978796d733fc57d0b0b8 Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Mon, 9 May 2022 10:37:51 -0400 Subject: [PATCH 03/14] fix context window description in _run --- google/cloud/aiplatform/training_jobs.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/google/cloud/aiplatform/training_jobs.py b/google/cloud/aiplatform/training_jobs.py index 3d8ebdcea1..cff41fd5e1 100644 --- a/google/cloud/aiplatform/training_jobs.py +++ b/google/cloud/aiplatform/training_jobs.py @@ -2137,9 +2137,10 @@ def _run( Optional. Column names that should be used as attribute columns. Each column is constant within a time series. context_window (int): - Optional. The number of periods offset into the past to restrict past sequence, where each - period is one unit of granularity as defined by [period]. When not provided uses the - default value of 0 which means the model sets each series historical window to be 0 (also + Optional. The amount of time into the past training and prediction data is used for + model training and prediction respectively. Expressed in number of units defined by the + [data_granularity_unit] and [data_granularity_count] fields. When not provided uses the + default value of 0 which means the model sets each series context window to be 0 (also known as "cold start"). Inclusive. export_evaluated_data_items (bool): Whether to export the test set predictions to a BigQuery table. From a1ba2c0c89e6ca4607d47853a9cba8662c19db65 Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Mon, 9 May 2022 12:31:23 -0400 Subject: [PATCH 04/14] blacken --- google/cloud/aiplatform/training_jobs.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/google/cloud/aiplatform/training_jobs.py b/google/cloud/aiplatform/training_jobs.py index cff41fd5e1..ab0a9bc54f 100644 --- a/google/cloud/aiplatform/training_jobs.py +++ b/google/cloud/aiplatform/training_jobs.py @@ -1962,8 +1962,7 @@ def run( if self._has_run: raise RuntimeError( - f"{self.__class__._model_type} Forecasting Training has " - "already run." + f"{self.__class__._model_type} Forecasting Training has already run." ) if additional_experiments: @@ -4741,8 +4740,7 @@ class column_data_types: class AutoMLForecastingTrainingJob(_ForecastingTrainingJob): _model_type = "AutoML" - _training_task_definition = ( - schema.training_job.definition.automl_forecasting) + _training_task_definition = schema.training_job.definition.automl_forecasting _supported_training_schemas = (schema.training_job.definition.automl_forecasting,) def __init__( @@ -5154,8 +5152,7 @@ def run( class SequenceToSequencePlusForecastingTrainingJob(_ForecastingTrainingJob): _model_type = "Seq2Seq" - _training_task_definition = ( - schema.training_job.definition.seq2seq_forecasting) + _training_task_definition = schema.training_job.definition.seq2seq_forecasting _supported_training_schemas = (schema.training_job.definition.seq2seq_forecasting,) def __init__( From f6d3add7e399588ef1e89cefed7579800858b76d Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Mon, 9 May 2022 15:25:32 -0400 Subject: [PATCH 05/14] add supported schemas to forecasting base class --- google/cloud/aiplatform/training_jobs.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/google/cloud/aiplatform/training_jobs.py b/google/cloud/aiplatform/training_jobs.py index ab0a9bc54f..b4303a2a94 100644 --- a/google/cloud/aiplatform/training_jobs.py +++ b/google/cloud/aiplatform/training_jobs.py @@ -1564,6 +1564,8 @@ def _model_upload_fail_string(self) -> str: class _ForecastingTrainingJob(_TrainingJob): """ABC for Forecasting Training Pipelines.""" + _supported_training_schemas = tuple() + def __init__( self, display_name: Optional[str] = None, From a4e2a333148be14dfa883e116bc58d3d20b856f3 Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Tue, 17 May 2022 09:48:54 -0400 Subject: [PATCH 06/14] Add seq2seq job to init file. --- google/cloud/aiplatform/__init__.py | 2 ++ google/cloud/aiplatform/schema.py | 2 +- google/cloud/aiplatform/training_jobs.py | 16 +++++++++------- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/google/cloud/aiplatform/__init__.py b/google/cloud/aiplatform/__init__.py index db7d0a7c18..1ad69b2a54 100644 --- a/google/cloud/aiplatform/__init__.py +++ b/google/cloud/aiplatform/__init__.py @@ -63,6 +63,7 @@ CustomPythonPackageTrainingJob, AutoMLTabularTrainingJob, AutoMLForecastingTrainingJob, + SequenceToSequencePlusForecastingTrainingJob, AutoMLImageTrainingJob, AutoMLTextTrainingJob, AutoMLVideoTrainingJob, @@ -116,6 +117,7 @@ "Model", "ModelEvaluation", "PipelineJob", + "SequenceToSequencePlusForecastingTrainingJob", "TabularDataset", "Tensorboard", "TensorboardExperiment", diff --git a/google/cloud/aiplatform/schema.py b/google/cloud/aiplatform/schema.py index 8c8e7f32f3..96a7a50bbd 100644 --- a/google/cloud/aiplatform/schema.py +++ b/google/cloud/aiplatform/schema.py @@ -23,7 +23,7 @@ class definition: custom_task = "gs://google-cloud-aiplatform/schema/trainingjob/definition/custom_task_1.0.0.yaml" automl_tabular = "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_tabular_1.0.0.yaml" automl_forecasting = "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_time_series_forecasting_1.0.0.yaml" - seq2seq_forecasting = "gs://google-cloud-aiplatform/schema/trainingjob/definition/seq2seq_plus_time_series_forecasting_1.0.0.yaml" + seq2seq_plus_forecasting = "gs://google-cloud-aiplatform/schema/trainingjob/definition/seq2seq_plus_time_series_forecasting_1.0.0.yaml" automl_image_classification = "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_image_classification_1.0.0.yaml" automl_image_object_detection = "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_image_object_detection_1.0.0.yaml" automl_text_classification = "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_text_classification_1.0.0.yaml" diff --git a/google/cloud/aiplatform/training_jobs.py b/google/cloud/aiplatform/training_jobs.py index b4303a2a94..a506e74a51 100644 --- a/google/cloud/aiplatform/training_jobs.py +++ b/google/cloud/aiplatform/training_jobs.py @@ -1693,7 +1693,7 @@ def _training_task_definition(cls) -> str: """A GCS path to the YAML file that defines the training task. The definition files that can be used here are found in - gs://google-cloud- aiplatform/schema/trainingjob/definition/. + gs://google-cloud-aiplatform/schema/trainingjob/definition/. """ pass @@ -1958,13 +1958,13 @@ def run( if self._is_waiting_to_run(): raise RuntimeError( - f"{self.__class__._model_type} Forecasting Training is already " - "scheduled to run." + f"{self._model_type} Forecasting Training is already scheduled " + "to run." ) if self._has_run: raise RuntimeError( - f"{self.__class__._model_type} Forecasting Training has already run." + f"{self._model_type} Forecasting Training has already run." ) if additional_experiments: @@ -2345,7 +2345,7 @@ def _run( ) new_model = self._run_job( - training_task_definition=self.__class__._training_task_definition, + training_task_definition=self._training_task_definition, training_task_inputs=training_task_inputs_dict, dataset=dataset, training_fraction_split=training_fraction_split, @@ -5154,8 +5154,10 @@ def run( class SequenceToSequencePlusForecastingTrainingJob(_ForecastingTrainingJob): _model_type = "Seq2Seq" - _training_task_definition = schema.training_job.definition.seq2seq_forecasting - _supported_training_schemas = (schema.training_job.definition.seq2seq_forecasting,) + _training_task_definition = schema.training_job.definition.seq2seq_plus_forecasting + _supported_training_schemas = ( + schema.training_job.definition.seq2seq_plus_forecasting, + ) def __init__( self, From 4e2cfafa18698dda4662dd020fa7ab9609afcccb Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Mon, 23 May 2022 16:13:43 -0400 Subject: [PATCH 07/14] only keep super methods that need documentation --- google/cloud/aiplatform/training_jobs.py | 744 ++--------------------- 1 file changed, 61 insertions(+), 683 deletions(-) diff --git a/google/cloud/aiplatform/training_jobs.py b/google/cloud/aiplatform/training_jobs.py index a506e74a51..769fffbd25 100644 --- a/google/cloud/aiplatform/training_jobs.py +++ b/google/cloud/aiplatform/training_jobs.py @@ -4745,118 +4745,6 @@ class AutoMLForecastingTrainingJob(_ForecastingTrainingJob): _training_task_definition = schema.training_job.definition.automl_forecasting _supported_training_schemas = (schema.training_job.definition.automl_forecasting,) - def __init__( - self, - display_name: Optional[str] = None, - optimization_objective: Optional[str] = None, - column_specs: Optional[Dict[str, str]] = None, - column_transformations: Optional[List[Dict[str, Dict[str, str]]]] = None, - project: Optional[str] = None, - location: Optional[str] = None, - credentials: Optional[auth_credentials.Credentials] = None, - labels: Optional[Dict[str, str]] = None, - training_encryption_spec_key_name: Optional[str] = None, - model_encryption_spec_key_name: Optional[str] = None, - ): - """Constructs a AutoML Forecasting Training Job. - - Args: - display_name (str): - Optional. The user-defined name of this TrainingPipeline. - optimization_objective (str): - Optional. Objective function the model is to be optimized towards. - The training process creates a Model that optimizes the value of the objective - function over the validation set. The supported optimization objectives: - "minimize-rmse" (default) - Minimize root-mean-squared error (RMSE). - "minimize-mae" - Minimize mean-absolute error (MAE). - "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE). - "minimize-rmspe" - Minimize root-mean-squared percentage error (RMSPE). - "minimize-wape-mae" - Minimize the combination of weighted absolute percentage error (WAPE) - and mean-absolute-error (MAE). - "minimize-quantile-loss" - Minimize the quantile loss at the defined quantiles. - (Set this objective to build quantile forecasts.) - column_specs (Dict[str, str]): - Optional. Alternative to column_transformations where the keys of the dict - are column names and their respective values are one of - AutoMLTabularTrainingJob.column_data_types. - When creating transformation for BigQuery Struct column, the column - should be flattened using "." as the delimiter. Only columns with no child - should have a transformation. - If an input column has no transformations on it, such a column is - ignored by the training, except for the targetColumn, which should have - no transformations defined on. - Only one of column_transformations or column_specs should be passed. - column_transformations (List[Dict[str, Dict[str, str]]]): - Optional. Transformations to apply to the input columns (i.e. columns other - than the targetColumn). Each transformation may produce multiple - result values from the column's value, and all are used for training. - When creating transformation for BigQuery Struct column, the column - should be flattened using "." as the delimiter. Only columns with no child - should have a transformation. - If an input column has no transformations on it, such a column is - ignored by the training, except for the targetColumn, which should have - no transformations defined on. - Only one of column_transformations or column_specs should be passed. - Consider using column_specs as column_transformations will be deprecated eventually. - project (str): - Optional. Project to run training in. Overrides project set in aiplatform.init. - location (str): - Optional. Location to run training in. Overrides location set in aiplatform.init. - credentials (auth_credentials.Credentials): - Optional. Custom credentials to use to run call training service. Overrides - credentials set in aiplatform.init. - labels (Dict[str, str]): - Optional. The labels with user-defined metadata to - organize TrainingPipelines. - Label keys and values can be no longer than 64 - characters (Unicode codepoints), can only - contain lowercase letters, numeric characters, - underscores and dashes. International characters - are allowed. - See https://goo.gl/xmQnxf for more information - and examples of labels. - training_encryption_spec_key_name (Optional[str]): - Optional. The Cloud KMS resource identifier of the customer - managed encryption key used to protect the training pipeline. Has the - form: - ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. - The key needs to be in the same region as where the compute - resource is created. - - If set, this TrainingPipeline will be secured by this key. - - Note: Model trained by this TrainingPipeline is also secured - by this key if ``model_to_upload`` is not set separately. - - Overrides encryption_spec_key_name set in aiplatform.init. - model_encryption_spec_key_name (Optional[str]): - Optional. The Cloud KMS resource identifier of the customer - managed encryption key used to protect the model. Has the - form: - ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. - The key needs to be in the same region as where the compute - resource is created. - - If set, the trained Model will be secured by this key. - - Overrides encryption_spec_key_name set in aiplatform.init. - - Raises: - ValueError: If both column_transformations and column_specs were provided. - """ - super().__init__( - display_name=display_name, - optimization_objective=optimization_objective, - column_specs=column_specs, - column_transformations=column_transformations, - project=project, - location=location, - credentials=credentials, - labels=labels, - training_encryption_spec_key_name=training_encryption_spec_key_name, - model_encryption_spec_key_name=model_encryption_spec_key_name, - ) - def run( self, dataset: datasets.TimeSeriesDataset, @@ -4879,397 +4767,69 @@ def run( export_evaluated_data_items: bool = False, export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, export_evaluated_data_items_override_destination: bool = False, - quantiles: Optional[List[float]] = None, - validation_options: Optional[str] = None, - budget_milli_node_hours: int = 1000, - model_display_name: Optional[str] = None, - model_labels: Optional[Dict[str, str]] = None, - additional_experiments: Optional[List[str]] = None, - hierarchy_group_columns: Optional[List[str]] = None, - hierarchy_group_total_weight: Optional[float] = None, - hierarchy_temporal_total_weight: Optional[float] = None, - hierarchy_group_temporal_total_weight: Optional[float] = None, - window_column: Optional[str] = None, - window_stride_length: Optional[int] = None, - window_max_count: Optional[int] = None, - holiday_regions: Optional[List[str]] = None, - sync: bool = True, - create_request_timeout: Optional[float] = None, - ) -> models.Model: - """Runs the training job and returns a model. - - If training on a Vertex AI dataset, you can use one of the following split configurations: - Data fraction splits: - Any of ``training_fraction_split``, ``validation_fraction_split`` and - ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If - the provided ones sum to less than 1, the remainder is assigned to sets as - decided by Vertex AI. If none of the fractions are set, by default roughly 80% - of data will be used for training, 10% for validation, and 10% for test. - - Predefined splits: - Assigns input data to training, validation, and test sets based on the value of a provided key. - If using predefined splits, ``predefined_split_column_name`` must be provided. - Supported only for tabular Datasets. - - Timestamp splits: - Assigns input data to training, validation, and test sets - based on a provided timestamps. The youngest data pieces are - assigned to training set, next to validation set, and the oldest - to the test set. - Supported only for tabular Datasets. - - Args: - dataset (datasets.TimeSeriesDataset): - Required. The dataset within the same Project from which data will be used to train the Model. The - Dataset must use schema compatible with Model being trained, - and what is compatible should be described in the used - TrainingPipeline's [training_task_definition] - [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. - For time series Datasets, all their data is exported to - training, to pick and choose from. - target_column (str): - Required. Name of the column that the Model is to predict values for. This - column must be unavailable at forecast. - time_column (str): - Required. Name of the column that identifies time order in the time series. - This column must be available at forecast. - time_series_identifier_column (str): - Required. Name of the column that identifies the time series. - unavailable_at_forecast_columns (List[str]): - Required. Column names of columns that are unavailable at forecast. - Each column contains information for the given entity (identified by the - [time_series_identifier_column]) that is unknown before the forecast - (e.g. population of a city in a given year, or weather on a given day). - available_at_forecast_columns (List[str]): - Required. Column names of columns that are available at forecast. - Each column contains information for the given entity (identified by the - [time_series_identifier_column]) that is known at forecast. - forecast_horizon: (int): - Required. The amount of time into the future for which forecasted values for the target are - returned. Expressed in number of units defined by the [data_granularity_unit] and - [data_granularity_count] field. Inclusive. - data_granularity_unit (str): - Required. The data granularity unit. Accepted values are ``minute``, - ``hour``, ``day``, ``week``, ``month``, ``year``. - data_granularity_count (int): - Required. The number of data granularity units between data points in the training - data. If [data_granularity_unit] is `minute`, can be 1, 5, 10, 15, or 30. For all other - values of [data_granularity_unit], must be 1. - predefined_split_column_name (str): - Optional. The key is a name of one of the Dataset's data - columns. The value of the key (either the label's value or - value in the column) must be one of {``TRAIN``, - ``VALIDATE``, ``TEST``}, and it defines to which set the - given piece of data is assigned. If for a piece of data the - key is not present or has an invalid value, that piece is - ignored by the pipeline. - - Supported only for tabular and time series Datasets. - timestamp_split_column_name (str): - Optional. The key is a name of one of the Dataset's data - columns. The value of the key values of the key (the values in - the column) must be in RFC 3339 `date-time` format, where - `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a - piece of data the key is not present or has an invalid value, - that piece is ignored by the pipeline. - Supported only for tabular and time series Datasets. - This parameter must be used with training_fraction_split, - validation_fraction_split, and test_fraction_split. - weight_column (str): - Optional. Name of the column that should be used as the weight column. - Higher values in this column give more importance to the row - during Model training. The column must have numeric values between 0 and - 10000 inclusively, and 0 value means that the row is ignored. - If the weight column field is not set, then all rows are assumed to have - equal weight of 1. This column must be available at forecast. - time_series_attribute_columns (List[str]): - Optional. Column names that should be used as attribute columns. - Each column is constant within a time series. - context_window (int): - Optional. The amount of time into the past training and prediction data is used for - model training and prediction respectively. Expressed in number of units defined by the - [data_granularity_unit] and [data_granularity_count] fields. When not provided uses the - default value of 0 which means the model sets each series context window to be 0 (also - known as "cold start"). Inclusive. - export_evaluated_data_items (bool): - Whether to export the test set predictions to a BigQuery table. - If False, then the export is not performed. - export_evaluated_data_items_bigquery_destination_uri (string): - Optional. URI of desired destination BigQuery table for exported test set predictions. - - Expected format: - ``bq://::
`` - - If not specified, then results are exported to the following auto-created BigQuery - table: - ``:export_evaluated_examples__.evaluated_examples`` - - Applies only if [export_evaluated_data_items] is True. - export_evaluated_data_items_override_destination (bool): - Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], - if the table exists, for exported test set predictions. If False, and the - table exists, then the training job will fail. - - Applies only if [export_evaluated_data_items] is True and - [export_evaluated_data_items_bigquery_destination_uri] is specified. - quantiles (List[float]): - Quantiles to use for the ``minimize-quantile-loss`` - [AutoMLForecastingTrainingJob.optimization_objective]. This argument is required in - this case. - - Accepts up to 5 quantiles in the form of a double from 0 to 1, exclusive. - Each quantile must be unique. - validation_options (str): - Validation options for the data validation component. The available options are: - "fail-pipeline" - (default), will validate against the validation and fail the pipeline - if it fails. - "ignore-validation" - ignore the results of the validation and continue the pipeline - budget_milli_node_hours (int): - Optional. The train budget of creating this Model, expressed in milli node - hours i.e. 1,000 value in this field means 1 node hour. - The training cost of the model will not exceed this budget. The final - cost will be attempted to be close to the budget, though may end up - being (even) noticeably smaller - at the backend's discretion. This - especially may happen when further model training ceases to provide - any improvements. - If the budget is set to a value known to be insufficient to train a - Model for the given training set, the training won't be attempted and - will error. - The minimum value is 1000 and the maximum is 72000. - model_display_name (str): - Optional. If the script produces a managed Vertex AI Model. The display name of - the Model. The name can be up to 128 characters long and can be consist - of any UTF-8 characters. - - If not provided upon creation, the job's display_name is used. - model_labels (Dict[str, str]): - Optional. The labels with user-defined metadata to - organize your Models. - Label keys and values can be no longer than 64 - characters (Unicode codepoints), can only - contain lowercase letters, numeric characters, - underscores and dashes. International characters - are allowed. - See https://goo.gl/xmQnxf for more information - and examples of labels. - additional_experiments (List[str]): - Optional. Additional experiment flags for the time series forcasting training. - create_request_timeout (float): - Optional. The timeout for the create request in seconds. - hierarchy_group_columns (List[str]): - Optional. A list of time series attribute column names that - define the time series hierarchy. Only one level of hierarchy is - supported, ex. ``region`` for a hierarchy of stores or - ``department`` for a hierarchy of products. If multiple columns - are specified, time series will be grouped by their combined - values, ex. (``blue``, ``large``) for ``color`` and ``size``, up - to 5 columns are accepted. If no group columns are specified, - all time series are considered to be part of the same group. - hierarchy_group_total_weight (float): - Optional. The weight of the loss for predictions aggregated over - time series in the same hierarchy group. - hierarchy_temporal_total_weight (float): - Optional. The weight of the loss for predictions aggregated over - the horizon for a single time series. - hierarchy_group_temporal_total_weight (float): - Optional. The weight of the loss for predictions aggregated over - both the horizon and time series in the same hierarchy group. - window_column (str): - Optional. Name of the column that should be used to filter input - rows. The column should contain either booleans or string - booleans; if the value of the row is True, generate a sliding - window from that row. - window_stride_length (int): - Optional. Step length used to generate input examples. Every - ``window_stride_length`` rows will be used to generate a sliding - window. - window_max_count (int): - Optional. Number of rows that should be used to generate input - examples. If the total row count is larger than this number, the - input data will be randomly sampled to hit the count. - holiday_regions (List[str]): - Optional. The geographical regions to use when creating holiday - features. This option is only allowed when data_granularity_unit - is ``day``. Acceptable values can come from any of the following - levels: - Top level: GLOBAL - Second level: continental regions - NA: North America - JAPAC: Japan and Asia Pacific - EMEA: Europe, the Middle East and Africa - LAC: Latin America and the Caribbean - Third level: countries from ISO 3166-1 Country codes. - sync (bool): - Optional. Whether to execute this method synchronously. If False, this method - will be executed in concurrent Future and any downstream object will - be immediately returned and synced when the Future has completed. - - Returns: - model: The trained Vertex AI Model resource or None if training did not - produce a Vertex AI Model. - - Raises: - RuntimeError: If Training job has already been run or is waiting to run. - """ - - return super().run( - dataset=dataset, - target_column=target_column, - time_column=time_column, - time_series_identifier_column=time_series_identifier_column, - unavailable_at_forecast_columns=unavailable_at_forecast_columns, - available_at_forecast_columns=available_at_forecast_columns, - forecast_horizon=forecast_horizon, - data_granularity_unit=data_granularity_unit, - data_granularity_count=data_granularity_count, - training_fraction_split=training_fraction_split, - validation_fraction_split=validation_fraction_split, - test_fraction_split=test_fraction_split, - predefined_split_column_name=predefined_split_column_name, - timestamp_split_column_name=timestamp_split_column_name, - weight_column=weight_column, - time_series_attribute_columns=time_series_attribute_columns, - context_window=context_window, - budget_milli_node_hours=budget_milli_node_hours, - export_evaluated_data_items=export_evaluated_data_items, - export_evaluated_data_items_bigquery_destination_uri=export_evaluated_data_items_bigquery_destination_uri, - export_evaluated_data_items_override_destination=export_evaluated_data_items_override_destination, - quantiles=quantiles, - validation_options=validation_options, - model_display_name=model_display_name, - model_labels=model_labels, - additional_experiments=additional_experiments, - hierarchy_group_columns=hierarchy_group_columns, - hierarchy_group_total_weight=hierarchy_group_total_weight, - hierarchy_temporal_total_weight=hierarchy_temporal_total_weight, - hierarchy_group_temporal_total_weight=hierarchy_group_temporal_total_weight, - window_column=window_column, - window_stride_length=window_stride_length, - window_max_count=window_max_count, - holiday_regions=holiday_regions, - sync=sync, - create_request_timeout=create_request_timeout, - ) - - -class SequenceToSequencePlusForecastingTrainingJob(_ForecastingTrainingJob): - _model_type = "Seq2Seq" - _training_task_definition = schema.training_job.definition.seq2seq_plus_forecasting - _supported_training_schemas = ( - schema.training_job.definition.seq2seq_plus_forecasting, - ) - - def __init__( - self, - display_name: Optional[str] = None, - optimization_objective: Optional[str] = None, - column_specs: Optional[Dict[str, str]] = None, - column_transformations: Optional[List[Dict[str, Dict[str, str]]]] = None, - project: Optional[str] = None, - location: Optional[str] = None, - credentials: Optional[auth_credentials.Credentials] = None, - labels: Optional[Dict[str, str]] = None, - training_encryption_spec_key_name: Optional[str] = None, - model_encryption_spec_key_name: Optional[str] = None, - ): - """Constructs a Seq2Seq Forecasting Training Job. - - Args: - display_name (str): - Optional. The user-defined name of this TrainingPipeline. - optimization_objective (str): - Optional. Objective function the model is to be optimized towards. - The training process creates a Model that optimizes the value of the objective - function over the validation set. The supported optimization objectives: - "minimize-rmse" (default) - Minimize root-mean-squared error (RMSE). - "minimize-mae" - Minimize mean-absolute error (MAE). - "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE). - "minimize-rmspe" - Minimize root-mean-squared percentage error (RMSPE). - "minimize-wape-mae" - Minimize the combination of weighted absolute percentage error (WAPE) - and mean-absolute-error (MAE). - "minimize-quantile-loss" - Minimize the quantile loss at the defined quantiles. - (Set this objective to build quantile forecasts.) - column_specs (Dict[str, str]): - Optional. Alternative to column_transformations where the keys of the dict - are column names and their respective values are one of - AutoMLTabularTrainingJob.column_data_types. - When creating transformation for BigQuery Struct column, the column - should be flattened using "." as the delimiter. Only columns with no child - should have a transformation. - If an input column has no transformations on it, such a column is - ignored by the training, except for the targetColumn, which should have - no transformations defined on. - Only one of column_transformations or column_specs should be passed. - column_transformations (List[Dict[str, Dict[str, str]]]): - Optional. Transformations to apply to the input columns (i.e. columns other - than the targetColumn). Each transformation may produce multiple - result values from the column's value, and all are used for training. - When creating transformation for BigQuery Struct column, the column - should be flattened using "." as the delimiter. Only columns with no child - should have a transformation. - If an input column has no transformations on it, such a column is - ignored by the training, except for the targetColumn, which should have - no transformations defined on. - Only one of column_transformations or column_specs should be passed. - Consider using column_specs as column_transformations will be deprecated eventually. - project (str): - Optional. Project to run training in. Overrides project set in aiplatform.init. - location (str): - Optional. Location to run training in. Overrides location set in aiplatform.init. - credentials (auth_credentials.Credentials): - Optional. Custom credentials to use to run call training service. Overrides - credentials set in aiplatform.init. - labels (Dict[str, str]): - Optional. The labels with user-defined metadata to - organize TrainingPipelines. - Label keys and values can be no longer than 64 - characters (Unicode codepoints), can only - contain lowercase letters, numeric characters, - underscores and dashes. International characters - are allowed. - See https://goo.gl/xmQnxf for more information - and examples of labels. - training_encryption_spec_key_name (Optional[str]): - Optional. The Cloud KMS resource identifier of the customer - managed encryption key used to protect the training pipeline. Has the - form: - ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. - The key needs to be in the same region as where the compute - resource is created. - - If set, this TrainingPipeline will be secured by this key. - - Note: Model trained by this TrainingPipeline is also secured - by this key if ``model_to_upload`` is not set separately. - - Overrides encryption_spec_key_name set in aiplatform.init. - model_encryption_spec_key_name (Optional[str]): - Optional. The Cloud KMS resource identifier of the customer - managed encryption key used to protect the model. Has the - form: - ``projects/my-project/locations/my-region/keyRings/my-kr/cryptoKeys/my-key``. - The key needs to be in the same region as where the compute - resource is created. - - If set, the trained Model will be secured by this key. + quantiles: Optional[List[float]] = None, + validation_options: Optional[str] = None, + budget_milli_node_hours: int = 1000, + model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, + additional_experiments: Optional[List[str]] = None, + hierarchy_group_columns: Optional[List[str]] = None, + hierarchy_group_total_weight: Optional[float] = None, + hierarchy_temporal_total_weight: Optional[float] = None, + hierarchy_group_temporal_total_weight: Optional[float] = None, + window_column: Optional[str] = None, + window_stride_length: Optional[int] = None, + window_max_count: Optional[int] = None, + holiday_regions: Optional[List[str]] = None, + sync: bool = True, + create_request_timeout: Optional[float] = None, + ) -> models.Model: + return super().run( + dataset=dataset, + target_column=target_column, + time_column=time_column, + time_series_identifier_column=time_series_identifier_column, + unavailable_at_forecast_columns=unavailable_at_forecast_columns, + available_at_forecast_columns=available_at_forecast_columns, + forecast_horizon=forecast_horizon, + data_granularity_unit=data_granularity_unit, + data_granularity_count=data_granularity_count, + training_fraction_split=training_fraction_split, + validation_fraction_split=validation_fraction_split, + test_fraction_split=test_fraction_split, + predefined_split_column_name=predefined_split_column_name, + timestamp_split_column_name=timestamp_split_column_name, + weight_column=weight_column, + time_series_attribute_columns=time_series_attribute_columns, + context_window=context_window, + budget_milli_node_hours=budget_milli_node_hours, + export_evaluated_data_items=export_evaluated_data_items, + export_evaluated_data_items_bigquery_destination_uri=export_evaluated_data_items_bigquery_destination_uri, + export_evaluated_data_items_override_destination=export_evaluated_data_items_override_destination, + quantiles=quantiles, + validation_options=validation_options, + model_display_name=model_display_name, + model_labels=model_labels, + additional_experiments=additional_experiments, + hierarchy_group_columns=hierarchy_group_columns, + hierarchy_group_total_weight=hierarchy_group_total_weight, + hierarchy_temporal_total_weight=hierarchy_temporal_total_weight, + hierarchy_group_temporal_total_weight=hierarchy_group_temporal_total_weight, + window_column=window_column, + window_stride_length=window_stride_length, + window_max_count=window_max_count, + holiday_regions=holiday_regions, + sync=sync, + create_request_timeout=create_request_timeout, + ) - Overrides encryption_spec_key_name set in aiplatform.init. - Raises: - ValueError: If both column_transformations and column_specs were provided. - """ - super().__init__( - display_name=display_name, - optimization_objective=optimization_objective, - column_specs=column_specs, - column_transformations=column_transformations, - project=project, - location=location, - credentials=credentials, - labels=labels, - training_encryption_spec_key_name=training_encryption_spec_key_name, - model_encryption_spec_key_name=model_encryption_spec_key_name, - ) +class SequenceToSequencePlusForecastingTrainingJob(_ForecastingTrainingJob): + _model_type = "Seq2Seq" + _training_task_definition = schema.training_job.definition.seq2seq_plus_forecasting + _supported_training_schemas = ( + schema.training_job.definition.seq2seq_plus_forecasting, + ) def run( self, @@ -5302,178 +4862,6 @@ def run( sync: bool = True, create_request_timeout: Optional[float] = None, ) -> models.Model: - """Runs the training job and returns a model. - - If training on a Vertex AI dataset, you can use one of the following split configurations: - Data fraction splits: - Any of ``training_fraction_split``, ``validation_fraction_split`` and - ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If - the provided ones sum to less than 1, the remainder is assigned to sets as - decided by Vertex AI. If none of the fractions are set, by default roughly 80% - of data will be used for training, 10% for validation, and 10% for test. - - Predefined splits: - Assigns input data to training, validation, and test sets based on the value of a provided key. - If using predefined splits, ``predefined_split_column_name`` must be provided. - Supported only for tabular Datasets. - - Timestamp splits: - Assigns input data to training, validation, and test sets - based on a provided timestamps. The youngest data pieces are - assigned to training set, next to validation set, and the oldest - to the test set. - Supported only for tabular Datasets. - - Args: - dataset (datasets.TimeSeriesDataset): - Required. The dataset within the same Project from which data will be used to train the Model. The - Dataset must use schema compatible with Model being trained, - and what is compatible should be described in the used - TrainingPipeline's [training_task_definition] - [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. - For time series Datasets, all their data is exported to - training, to pick and choose from. - target_column (str): - Required. Name of the column that the Model is to predict values for. This - column must be unavailable at forecast. - time_column (str): - Required. Name of the column that identifies time order in the time series. - This column must be available at forecast. - time_series_identifier_column (str): - Required. Name of the column that identifies the time series. - unavailable_at_forecast_columns (List[str]): - Required. Column names of columns that are unavailable at forecast. - Each column contains information for the given entity (identified by the - [time_series_identifier_column]) that is unknown before the forecast - (e.g. population of a city in a given year, or weather on a given day). - available_at_forecast_columns (List[str]): - Required. Column names of columns that are available at forecast. - Each column contains information for the given entity (identified by the - [time_series_identifier_column]) that is known at forecast. - forecast_horizon: (int): - Required. The amount of time into the future for which forecasted values for the target are - returned. Expressed in number of units defined by the [data_granularity_unit] and - [data_granularity_count] field. Inclusive. - data_granularity_unit (str): - Required. The data granularity unit. Accepted values are ``minute``, - ``hour``, ``day``, ``week``, ``month``, ``year``. - data_granularity_count (int): - Required. The number of data granularity units between data points in the training - data. If [data_granularity_unit] is `minute`, can be 1, 5, 10, 15, or 30. For all other - values of [data_granularity_unit], must be 1. - predefined_split_column_name (str): - Optional. The key is a name of one of the Dataset's data - columns. The value of the key (either the label's value or - value in the column) must be one of {``TRAIN``, - ``VALIDATE``, ``TEST``}, and it defines to which set the - given piece of data is assigned. If for a piece of data the - key is not present or has an invalid value, that piece is - ignored by the pipeline. - - Supported only for tabular and time series Datasets. - timestamp_split_column_name (str): - Optional. The key is a name of one of the Dataset's data - columns. The value of the key values of the key (the values in - the column) must be in RFC 3339 `date-time` format, where - `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a - piece of data the key is not present or has an invalid value, - that piece is ignored by the pipeline. - Supported only for tabular and time series Datasets. - This parameter must be used with training_fraction_split, - validation_fraction_split, and test_fraction_split. - weight_column (str): - Optional. Name of the column that should be used as the weight column. - Higher values in this column give more importance to the row - during Model training. The column must have numeric values between 0 and - 10000 inclusively, and 0 value means that the row is ignored. - If the weight column field is not set, then all rows are assumed to have - equal weight of 1. This column must be available at forecast. - time_series_attribute_columns (List[str]): - Optional. Column names that should be used as attribute columns. - Each column is constant within a time series. - context_window (int): - Optional. The amount of time into the past training and prediction data is used for - model training and prediction respectively. Expressed in number of units defined by the - [data_granularity_unit] and [data_granularity_count] fields. When not provided uses the - default value of 0 which means the model sets each series context window to be 0 (also - known as "cold start"). Inclusive. - export_evaluated_data_items (bool): - Whether to export the test set predictions to a BigQuery table. - If False, then the export is not performed. - export_evaluated_data_items_bigquery_destination_uri (string): - Optional. URI of desired destination BigQuery table for exported test set predictions. - - Expected format: - ``bq://::
`` - - If not specified, then results are exported to the following auto-created BigQuery - table: - ``:export_evaluated_examples__.evaluated_examples`` - - Applies only if [export_evaluated_data_items] is True. - export_evaluated_data_items_override_destination (bool): - Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], - if the table exists, for exported test set predictions. If False, and the - table exists, then the training job will fail. - - Applies only if [export_evaluated_data_items] is True and - [export_evaluated_data_items_bigquery_destination_uri] is specified. - quantiles (List[float]): - Quantiles to use for the `minimize-quantile-loss` - [AutoMLForecastingTrainingJob.optimization_objective]. This argument is required in - this case. - - Accepts up to 5 quantiles in the form of a double from 0 to 1, exclusive. - Each quantile must be unique. - validation_options (str): - Validation options for the data validation component. The available options are: - "fail-pipeline" - (default), will validate against the validation and fail the pipeline - if it fails. - "ignore-validation" - ignore the results of the validation and continue the pipeline - budget_milli_node_hours (int): - Optional. The train budget of creating this Model, expressed in milli node - hours i.e. 1,000 value in this field means 1 node hour. - The training cost of the model will not exceed this budget. The final - cost will be attempted to be close to the budget, though may end up - being (even) noticeably smaller - at the backend's discretion. This - especially may happen when further model training ceases to provide - any improvements. - If the budget is set to a value known to be insufficient to train a - Model for the given training set, the training won't be attempted and - will error. - The minimum value is 1000 and the maximum is 72000. - model_display_name (str): - Optional. If the script produces a managed Vertex AI Model. The display name of - the Model. The name can be up to 128 characters long and can be consist - of any UTF-8 characters. - - If not provided upon creation, the job's display_name is used. - model_labels (Dict[str, str]): - Optional. The labels with user-defined metadata to - organize your Models. - Label keys and values can be no longer than 64 - characters (Unicode codepoints), can only - contain lowercase letters, numeric characters, - underscores and dashes. International characters - are allowed. - See https://goo.gl/xmQnxf for more information - and examples of labels. - additional_experiments (List[str]): - Optional. Additional experiment flags for the time series forcasting training. - create_request_timeout (float): - Optional. The timeout for the create request in seconds. - sync (bool): - Whether to execute this method synchronously. If False, this method - will be executed in concurrent Future and any downstream object will - be immediately returned and synced when the Future has completed. - Returns: - model: The trained Vertex AI Model resource or None if training did not - produce a Vertex AI Model. - - Raises: - RuntimeError: If Training job has already been run or is waiting to run. - """ - return super().run( dataset=dataset, target_column=target_column, @@ -5505,16 +4893,6 @@ def run( create_request_timeout=create_request_timeout, ) - @property - def evaluated_data_items_bigquery_uri(self) -> Optional[str]: - """BigQuery location of exported evaluated examples from the Training Job - Returns: - str: BigQuery uri for the exported evaluated examples if the export - feature is enabled for training. - None: If the export feature was not enabled for training. - """ - return super().evaluated_data_items_bigquery_uri - class AutoMLImageTrainingJob(_TrainingJob): _supported_training_schemas = ( From a084a21289efcf04d2ac5018e96d125d158d087d Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Fri, 27 May 2022 14:33:19 -0400 Subject: [PATCH 08/14] add seq2seq e2e tests --- .../system/aiplatform/test_e2e_forecasting.py | 53 ++++++++++++++----- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/tests/system/aiplatform/test_e2e_forecasting.py b/tests/system/aiplatform/test_e2e_forecasting.py index b0f3e19711..7693767122 100644 --- a/tests/system/aiplatform/test_e2e_forecasting.py +++ b/tests/system/aiplatform/test_e2e_forecasting.py @@ -37,10 +37,7 @@ class TestEndToEndForecasting(e2e_base.TestEndToEnd): def test_end_to_end_forecasting(self, shared_state): """Builds a dataset, trains models, and gets batch predictions.""" - ds = None - automl_job = None - automl_model = None - automl_batch_prediction_job = None + resources = [] aiplatform.init( project=e2e_base._PROJECT, @@ -69,12 +66,17 @@ def test_end_to_end_forecasting(self, shared_state): } # Define both training jobs - # TODO(humichael): Add seq2seq job. automl_job = aiplatform.AutoMLForecastingTrainingJob( display_name=self._make_display_name("train-housing-automl"), optimization_objective="minimize-rmse", column_specs=column_specs, ) + seq2seq_job = aiplatform.SequenceToSequencePlusForecastingTrainingJob( + display_name=self._make_display_name("train-housing-seq2seq"), + optimization_objective="minimize-rmse", + column_specs=column_specs, + ) + resources.extend([automl_job, seq2seq_job]) # Kick off both training jobs, AutoML job will take approx one hour # to run. @@ -94,6 +96,23 @@ def test_end_to_end_forecasting(self, shared_state): model_display_name=self._make_display_name("automl-liquor-model"), sync=False, ) + seq2seq_model = seq2seq_job.run( + dataset=ds, + target_column=target_column, + time_column=time_column, + time_series_identifier_column=time_series_identifier_column, + available_at_forecast_columns=[time_column], + unavailable_at_forecast_columns=[target_column], + time_series_attribute_columns=["city", "zip_code", "county"], + forecast_horizon=30, + context_window=30, + data_granularity_unit="day", + data_granularity_count=1, + budget_milli_node_hours=1000, + model_display_name=self._make_display_name("seq2seq-liquor-model"), + sync=False, + ) + resources.extend([automl_model, seq2seq_model]) automl_batch_prediction_job = automl_model.batch_predict( job_display_name=self._make_display_name("automl-liquor-model"), @@ -105,8 +124,22 @@ def test_end_to_end_forecasting(self, shared_state): ), sync=False, ) + seq2seq_batch_prediction_job = seq2seq_model.batch_predict( + job_display_name=self._make_display_name("seq2seq-liquor-model"), + instances_format="bigquery", + machine_type="n1-standard-4", + bigquery_source=_PREDICTION_DATASET_BQ_PATH, + gcs_destination_prefix=( + f'gs://{shared_state["staging_bucket_name"]}/bp_results/' + ), + sync=False, + ) + resources.extend( + [automl_batch_prediction_job, seq2seq_batch_prediction_job] + ) automl_batch_prediction_job.wait() + seq2seq_batch_prediction_job.wait() assert ( automl_job.state @@ -117,11 +150,5 @@ def test_end_to_end_forecasting(self, shared_state): == job_state.JobState.JOB_STATE_SUCCEEDED ) finally: - if ds is not None: - ds.delete() - if automl_job is not None: - automl_job.delete() - if automl_model is not None: - automl_model.delete() - if automl_batch_prediction_job is not None: - automl_batch_prediction_job.delete() + for resource in resources: + resource.delete() From 4af79c7e37f21768cc2df7c6ddb27590c0e279c0 Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Fri, 27 May 2022 14:52:11 -0400 Subject: [PATCH 09/14] foo --- tests/system/aiplatform/test_e2e_forecasting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/system/aiplatform/test_e2e_forecasting.py b/tests/system/aiplatform/test_e2e_forecasting.py index 7693767122..6f70578d8f 100644 --- a/tests/system/aiplatform/test_e2e_forecasting.py +++ b/tests/system/aiplatform/test_e2e_forecasting.py @@ -28,7 +28,6 @@ "bq://ucaip-sample-tests:ucaip_test_us_central1.2021_sales_predict" ) - @pytest.mark.usefixtures("prepare_staging_bucket", "delete_staging_bucket") class TestEndToEndForecasting(e2e_base.TestEndToEnd): """End to end system test of the Vertex SDK with forecasting data.""" @@ -151,4 +150,5 @@ def test_end_to_end_forecasting(self, shared_state): ) finally: for resource in resources: + resource.wait_for_resource_creation() resource.delete() From d68e5306832030ff92e4e1160ca4e0d3cfb680c0 Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Fri, 27 May 2022 21:19:17 -0400 Subject: [PATCH 10/14] blacken --- tests/system/aiplatform/test_e2e_forecasting.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/system/aiplatform/test_e2e_forecasting.py b/tests/system/aiplatform/test_e2e_forecasting.py index 6f70578d8f..f4614972b2 100644 --- a/tests/system/aiplatform/test_e2e_forecasting.py +++ b/tests/system/aiplatform/test_e2e_forecasting.py @@ -28,6 +28,7 @@ "bq://ucaip-sample-tests:ucaip_test_us_central1.2021_sales_predict" ) + @pytest.mark.usefixtures("prepare_staging_bucket", "delete_staging_bucket") class TestEndToEndForecasting(e2e_base.TestEndToEnd): """End to end system test of the Vertex SDK with forecasting data.""" From 8302756c14629c88ff19eb6cfe5a199e0f77e1c0 Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Fri, 27 May 2022 21:45:36 -0400 Subject: [PATCH 11/14] fix merge conflict issues --- google/cloud/aiplatform/training_jobs.py | 33 +++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/google/cloud/aiplatform/training_jobs.py b/google/cloud/aiplatform/training_jobs.py index 769fffbd25..2b246e113a 100644 --- a/google/cloud/aiplatform/training_jobs.py +++ b/google/cloud/aiplatform/training_jobs.py @@ -1996,6 +1996,14 @@ def run( validation_options=validation_options, model_display_name=model_display_name, model_labels=model_labels, + hierarchy_group_columns=hierarchy_group_columns, + hierarchy_group_total_weight=hierarchy_group_total_weight, + hierarchy_temporal_total_weight=hierarchy_temporal_total_weight, + hierarchy_group_temporal_total_weight=hierarchy_group_temporal_total_weight, + window_column=window_column, + window_stride_length=window_stride_length, + window_max_count=window_max_count, + holiday_regions=holiday_regions, sync=sync, create_request_timeout=create_request_timeout, ) @@ -2381,7 +2389,14 @@ def evaluated_data_items_bigquery_uri(self) -> Optional[str]: feature is enabled for training. None: If the export feature was not enabled for training. """ - return super().evaluated_data_items_bigquery_uri + + self._assert_gca_resource_is_available() + + metadata = self._gca_resource.training_task_metadata + if metadata and "evaluatedDataItemsBigqueryUri" in metadata: + return metadata["evaluatedDataItemsBigqueryUri"] + + return None def _add_additional_experiments(self, additional_experiments: List[str]): """Add experiment flags to the training job. @@ -4859,6 +4874,14 @@ def run( model_display_name: Optional[str] = None, model_labels: Optional[Dict[str, str]] = None, additional_experiments: Optional[List[str]] = None, + hierarchy_group_columns: Optional[List[str]] = None, + hierarchy_group_total_weight: Optional[float] = None, + hierarchy_temporal_total_weight: Optional[float] = None, + hierarchy_group_temporal_total_weight: Optional[float] = None, + window_column: Optional[str] = None, + window_stride_length: Optional[int] = None, + window_max_count: Optional[int] = None, + holiday_regions: Optional[List[str]] = None, sync: bool = True, create_request_timeout: Optional[float] = None, ) -> models.Model: @@ -4889,6 +4912,14 @@ def run( model_display_name=model_display_name, model_labels=model_labels, additional_experiments=additional_experiments, + hierarchy_group_columns=hierarchy_group_columns, + hierarchy_group_total_weight=hierarchy_group_total_weight, + hierarchy_temporal_total_weight=hierarchy_temporal_total_weight, + hierarchy_group_temporal_total_weight=hierarchy_group_temporal_total_weight, + window_column=window_column, + window_stride_length=window_stride_length, + window_max_count=window_max_count, + holiday_regions=holiday_regions, sync=sync, create_request_timeout=create_request_timeout, ) From 6022bba1f1eb8d4ffc78019adb0b217bbebe12b4 Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Fri, 3 Jun 2022 00:59:31 -0400 Subject: [PATCH 12/14] make e2e test parameterized --- .../system/aiplatform/test_e2e_forecasting.py | 84 +++++++------------ 1 file changed, 30 insertions(+), 54 deletions(-) diff --git a/tests/system/aiplatform/test_e2e_forecasting.py b/tests/system/aiplatform/test_e2e_forecasting.py index f4614972b2..3f2dd6c946 100644 --- a/tests/system/aiplatform/test_e2e_forecasting.py +++ b/tests/system/aiplatform/test_e2e_forecasting.py @@ -16,6 +16,7 @@ # from google.cloud import aiplatform +from google.cloud.aiplatform import training_jobs from google.cloud.aiplatform.compat.types import job_state from google.cloud.aiplatform.compat.types import pipeline_state import pytest @@ -35,7 +36,16 @@ class TestEndToEndForecasting(e2e_base.TestEndToEnd): _temp_prefix = "temp-vertex-sdk-e2e-forecasting" - def test_end_to_end_forecasting(self, shared_state): + @pytest.mark.parametrize( + "training_job", + [ + training_jobs.AutoMLForecastingTrainingJob, + pytest.param( + training_jobs.SequenceToSequencePlusForecastingTrainingJob, + marks=pytest.mark.skip(reason="Seq2Seq not yet released.")), + ], + ) + def test_end_to_end_forecasting(self, shared_state, training_job): """Builds a dataset, trains models, and gets batch predictions.""" resources = [] @@ -45,14 +55,13 @@ def test_end_to_end_forecasting(self, shared_state): staging_bucket=shared_state["staging_bucket_name"], ) try: - # Create and import to single managed dataset for both training - # jobs. ds = aiplatform.TimeSeriesDataset.create( display_name=self._make_display_name("dataset"), bq_source=[_TRAINING_DATASET_BQ_PATH], sync=False, create_request_timeout=180.0, ) + resources.append(ds) time_column = "date" time_series_identifier_column = "store_name" @@ -65,22 +74,15 @@ def test_end_to_end_forecasting(self, shared_state): "county": "categorical", } - # Define both training jobs - automl_job = aiplatform.AutoMLForecastingTrainingJob( - display_name=self._make_display_name("train-housing-automl"), + job = training_job( + display_name=self._make_display_name( + "train-housing-forecasting"), optimization_objective="minimize-rmse", column_specs=column_specs, ) - seq2seq_job = aiplatform.SequenceToSequencePlusForecastingTrainingJob( - display_name=self._make_display_name("train-housing-seq2seq"), - optimization_objective="minimize-rmse", - column_specs=column_specs, - ) - resources.extend([automl_job, seq2seq_job]) + resources.append(job) - # Kick off both training jobs, AutoML job will take approx one hour - # to run. - automl_model = automl_job.run( + model = job.run( dataset=ds, target_column=target_column, time_column=time_column, @@ -93,29 +95,18 @@ def test_end_to_end_forecasting(self, shared_state): data_granularity_unit="day", data_granularity_count=1, budget_milli_node_hours=1000, - model_display_name=self._make_display_name("automl-liquor-model"), + holiday_regions=["GLOBAL"], + hierarchy_group_total_weight=1, + window_stride_length=1, + model_display_name=self._make_display_name( + "forecasting-liquor-model"), sync=False, ) - seq2seq_model = seq2seq_job.run( - dataset=ds, - target_column=target_column, - time_column=time_column, - time_series_identifier_column=time_series_identifier_column, - available_at_forecast_columns=[time_column], - unavailable_at_forecast_columns=[target_column], - time_series_attribute_columns=["city", "zip_code", "county"], - forecast_horizon=30, - context_window=30, - data_granularity_unit="day", - data_granularity_count=1, - budget_milli_node_hours=1000, - model_display_name=self._make_display_name("seq2seq-liquor-model"), - sync=False, - ) - resources.extend([automl_model, seq2seq_model]) + resources.append(model) - automl_batch_prediction_job = automl_model.batch_predict( - job_display_name=self._make_display_name("automl-liquor-model"), + batch_prediction_job = model.batch_predict( + job_display_name=self._make_display_name( + "forecasting-liquor-model"), instances_format="bigquery", machine_type="n1-standard-4", bigquery_source=_PREDICTION_DATASET_BQ_PATH, @@ -124,32 +115,17 @@ def test_end_to_end_forecasting(self, shared_state): ), sync=False, ) - seq2seq_batch_prediction_job = seq2seq_model.batch_predict( - job_display_name=self._make_display_name("seq2seq-liquor-model"), - instances_format="bigquery", - machine_type="n1-standard-4", - bigquery_source=_PREDICTION_DATASET_BQ_PATH, - gcs_destination_prefix=( - f'gs://{shared_state["staging_bucket_name"]}/bp_results/' - ), - sync=False, - ) - resources.extend( - [automl_batch_prediction_job, seq2seq_batch_prediction_job] - ) - - automl_batch_prediction_job.wait() - seq2seq_batch_prediction_job.wait() + resources.append(batch_prediction_job) + batch_prediction_job.wait() assert ( - automl_job.state + job.state == pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED ) assert ( - automl_batch_prediction_job.state + batch_prediction_job.state == job_state.JobState.JOB_STATE_SUCCEEDED ) finally: for resource in resources: - resource.wait_for_resource_creation() resource.delete() From a44d0555d4567e20cad3042f773a71883ad1eb28 Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Fri, 3 Jun 2022 12:06:35 -0400 Subject: [PATCH 13/14] fix prediction format issue --- tests/system/aiplatform/test_e2e_forecasting.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/system/aiplatform/test_e2e_forecasting.py b/tests/system/aiplatform/test_e2e_forecasting.py index 3f2dd6c946..18c981b9fe 100644 --- a/tests/system/aiplatform/test_e2e_forecasting.py +++ b/tests/system/aiplatform/test_e2e_forecasting.py @@ -108,6 +108,7 @@ def test_end_to_end_forecasting(self, shared_state, training_job): job_display_name=self._make_display_name( "forecasting-liquor-model"), instances_format="bigquery", + predictions_format="csv", machine_type="n1-standard-4", bigquery_source=_PREDICTION_DATASET_BQ_PATH, gcs_destination_prefix=( From e5a9caa51e34f1df0e7febfc369d613e7de1b8ff Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Fri, 3 Jun 2022 14:07:22 -0400 Subject: [PATCH 14/14] blacken --- .../system/aiplatform/test_e2e_forecasting.py | 22 ++++++------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/tests/system/aiplatform/test_e2e_forecasting.py b/tests/system/aiplatform/test_e2e_forecasting.py index 18c981b9fe..024946b91b 100644 --- a/tests/system/aiplatform/test_e2e_forecasting.py +++ b/tests/system/aiplatform/test_e2e_forecasting.py @@ -42,7 +42,8 @@ class TestEndToEndForecasting(e2e_base.TestEndToEnd): training_jobs.AutoMLForecastingTrainingJob, pytest.param( training_jobs.SequenceToSequencePlusForecastingTrainingJob, - marks=pytest.mark.skip(reason="Seq2Seq not yet released.")), + marks=pytest.mark.skip(reason="Seq2Seq not yet released."), + ), ], ) def test_end_to_end_forecasting(self, shared_state, training_job): @@ -75,8 +76,7 @@ def test_end_to_end_forecasting(self, shared_state, training_job): } job = training_job( - display_name=self._make_display_name( - "train-housing-forecasting"), + display_name=self._make_display_name("train-housing-forecasting"), optimization_objective="minimize-rmse", column_specs=column_specs, ) @@ -98,15 +98,13 @@ def test_end_to_end_forecasting(self, shared_state, training_job): holiday_regions=["GLOBAL"], hierarchy_group_total_weight=1, window_stride_length=1, - model_display_name=self._make_display_name( - "forecasting-liquor-model"), + model_display_name=self._make_display_name("forecasting-liquor-model"), sync=False, ) resources.append(model) batch_prediction_job = model.batch_predict( - job_display_name=self._make_display_name( - "forecasting-liquor-model"), + job_display_name=self._make_display_name("forecasting-liquor-model"), instances_format="bigquery", predictions_format="csv", machine_type="n1-standard-4", @@ -119,14 +117,8 @@ def test_end_to_end_forecasting(self, shared_state, training_job): resources.append(batch_prediction_job) batch_prediction_job.wait() - assert ( - job.state - == pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED - ) - assert ( - batch_prediction_job.state - == job_state.JobState.JOB_STATE_SUCCEEDED - ) + assert job.state == pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED + assert batch_prediction_job.state == job_state.JobState.JOB_STATE_SUCCEEDED finally: for resource in resources: resource.delete()