From 4e7175f6c4a2a4c8fbceae4c351578829ef88519 Mon Sep 17 00:00:00 2001 From: Ivan Cheung Date: Mon, 22 Aug 2022 12:28:37 -0400 Subject: [PATCH] docs(samples): Added seq2seq sample (#1595) * feat: Added seq2seq sample * Fixed typehint * Fixed test and mocks * Fixed lint issues --- samples/model-builder/conftest.py | 13 +++ ...te_training_pipeline_forecasting_sample.py | 2 +- ...ing_pipeline_forecasting_seq2seq_sample.py | 98 +++++++++++++++++++ ...ipeline_forecasting_seq2seq_sample_test.py | 85 ++++++++++++++++ ...ning_pipeline_tabular_regression_sample.py | 2 +- 5 files changed, 198 insertions(+), 2 deletions(-) create mode 100644 samples/model-builder/create_training_pipeline_forecasting_seq2seq_sample.py create mode 100644 samples/model-builder/create_training_pipeline_forecasting_seq2seq_sample_test.py diff --git a/samples/model-builder/conftest.py b/samples/model-builder/conftest.py index e312268b65..d9328b042d 100644 --- a/samples/model-builder/conftest.py +++ b/samples/model-builder/conftest.py @@ -249,6 +249,19 @@ def mock_run_automl_forecasting_training_job(mock_forecasting_training_job): yield mock +@pytest.fixture +def mock_get_automl_forecasting_seq2seq_training_job(mock_forecasting_training_job): + with patch.object(aiplatform, "SequenceToSequencePlusForecastingTrainingJob") as mock: + mock.return_value = mock_forecasting_training_job + yield mock + + +@pytest.fixture +def mock_run_automl_forecasting_seq2seq_training_job(mock_forecasting_training_job): + with patch.object(mock_forecasting_training_job, "run") as mock: + yield mock + + @pytest.fixture def mock_get_automl_image_training_job(mock_image_training_job): with patch.object(aiplatform, "AutoMLImageTrainingJob") as mock: diff --git a/samples/model-builder/create_training_pipeline_forecasting_sample.py b/samples/model-builder/create_training_pipeline_forecasting_sample.py index 0b710e894b..81b2f593e8 100644 --- a/samples/model-builder/create_training_pipeline_forecasting_sample.py +++ b/samples/model-builder/create_training_pipeline_forecasting_sample.py @@ -23,7 +23,7 @@ def create_training_pipeline_forecasting_sample( display_name: str, dataset_id: str, location: str = "us-central1", - model_display_name: str = None, + model_display_name: str = "my_model", target_column: str = "target_column", time_column: str = "date", time_series_identifier_column: str = "time_series_id", diff --git a/samples/model-builder/create_training_pipeline_forecasting_seq2seq_sample.py b/samples/model-builder/create_training_pipeline_forecasting_seq2seq_sample.py new file mode 100644 index 0000000000..f6bc524683 --- /dev/null +++ b/samples/model-builder/create_training_pipeline_forecasting_seq2seq_sample.py @@ -0,0 +1,98 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import List, Optional + +from google.cloud import aiplatform + + +# [START aiplatform_sdk_create_training_pipeline_forecasting_seq2seq_sample] +def create_training_pipeline_forecasting_seq2seq_sample( + project: str, + display_name: str, + dataset_id: str, + location: str = "us-central1", + model_display_name: str = "my_model", + target_column: str = "target_column", + time_column: str = "date", + time_series_identifier_column: str = "time_series_id", + unavailable_at_forecast_columns: List[str] = [], + available_at_forecast_columns: List[str] = [], + forecast_horizon: int = 1, + data_granularity_unit: str = "week", + data_granularity_count: int = 1, + training_fraction_split: float = 0.8, + validation_fraction_split: float = 0.1, + test_fraction_split: float = 0.1, + budget_milli_node_hours: int = 8000, + timestamp_split_column_name: str = "timestamp_split", + weight_column: str = "weight", + time_series_attribute_columns: List[str] = [], + context_window: int = 0, + export_evaluated_data_items: bool = False, + export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, + export_evaluated_data_items_override_destination: bool = False, + quantiles: Optional[List[float]] = None, + validation_options: Optional[str] = None, + predefined_split_column_name: Optional[str] = None, + sync: bool = True, +): + aiplatform.init(project=project, location=location) + + # Create training job + forecasting_seq2seq_job = aiplatform.SequenceToSequencePlusForecastingTrainingJob( + display_name=display_name, optimization_objective="minimize-rmse" + ) + + # Retrieve existing dataset + dataset = aiplatform.TimeSeriesDataset(dataset_id) + + # Run training job + model = forecasting_seq2seq_job.run( + dataset=dataset, + target_column=target_column, + time_column=time_column, + time_series_identifier_column=time_series_identifier_column, + unavailable_at_forecast_columns=unavailable_at_forecast_columns, + available_at_forecast_columns=available_at_forecast_columns, + forecast_horizon=forecast_horizon, + data_granularity_unit=data_granularity_unit, + data_granularity_count=data_granularity_count, + training_fraction_split=training_fraction_split, + validation_fraction_split=validation_fraction_split, + test_fraction_split=test_fraction_split, + predefined_split_column_name=predefined_split_column_name, + timestamp_split_column_name=timestamp_split_column_name, + weight_column=weight_column, + time_series_attribute_columns=time_series_attribute_columns, + context_window=context_window, + export_evaluated_data_items=export_evaluated_data_items, + export_evaluated_data_items_bigquery_destination_uri=export_evaluated_data_items_bigquery_destination_uri, + export_evaluated_data_items_override_destination=export_evaluated_data_items_override_destination, + quantiles=quantiles, + validation_options=validation_options, + budget_milli_node_hours=budget_milli_node_hours, + model_display_name=model_display_name, + sync=sync, + ) + + model.wait() + + print(model.display_name) + print(model.resource_name) + print(model.uri) + return model + + +# [END aiplatform_sdk_create_training_pipeline_forecasting_seq2seq_sample] diff --git a/samples/model-builder/create_training_pipeline_forecasting_seq2seq_sample_test.py b/samples/model-builder/create_training_pipeline_forecasting_seq2seq_sample_test.py new file mode 100644 index 0000000000..34a7eb7685 --- /dev/null +++ b/samples/model-builder/create_training_pipeline_forecasting_seq2seq_sample_test.py @@ -0,0 +1,85 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import create_training_pipeline_forecasting_seq2seq_sample +import test_constants as constants + + +def test_create_training_pipeline_forecasting_seq2seq_sample( + mock_sdk_init, + mock_time_series_dataset, + mock_get_automl_forecasting_seq2seq_training_job, + mock_run_automl_forecasting_seq2seq_training_job, + mock_get_time_series_dataset, +): + + create_training_pipeline_forecasting_seq2seq_sample.create_training_pipeline_forecasting_seq2seq_sample( + project=constants.PROJECT, + display_name=constants.DISPLAY_NAME, + dataset_id=constants.RESOURCE_ID, + model_display_name=constants.DISPLAY_NAME_2, + target_column=constants.TABULAR_TARGET_COLUMN, + training_fraction_split=constants.TRAINING_FRACTION_SPLIT, + validation_fraction_split=constants.VALIDATION_FRACTION_SPLIT, + test_fraction_split=constants.TEST_FRACTION_SPLIT, + budget_milli_node_hours=constants.BUDGET_MILLI_NODE_HOURS_8000, + timestamp_split_column_name=constants.TIMESTAMP_SPLIT_COLUMN_NAME, + weight_column=constants.WEIGHT_COLUMN, + time_series_attribute_columns=constants.TIME_SERIES_ATTRIBUTE_COLUMNS, + context_window=constants.CONTEXT_WINDOW, + export_evaluated_data_items=constants.EXPORT_EVALUATED_DATA_ITEMS, + export_evaluated_data_items_bigquery_destination_uri=constants.EXPORT_EVALUATED_DATA_ITEMS_BIGQUERY_DESTINATION_URI, + export_evaluated_data_items_override_destination=constants.EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, + quantiles=constants.QUANTILES, + validation_options=constants.VALIDATION_OPTIONS, + predefined_split_column_name=constants.PREDEFINED_SPLIT_COLUMN_NAME, + ) + + mock_get_time_series_dataset.assert_called_once_with(constants.RESOURCE_ID) + + mock_sdk_init.assert_called_once_with( + project=constants.PROJECT, location=constants.LOCATION + ) + mock_get_automl_forecasting_seq2seq_training_job.assert_called_once_with( + display_name=constants.DISPLAY_NAME, + optimization_objective="minimize-rmse", + ) + mock_run_automl_forecasting_seq2seq_training_job.assert_called_once_with( + dataset=mock_time_series_dataset, + target_column=constants.TABULAR_TARGET_COLUMN, + time_column=constants.FORECASTNG_TIME_COLUMN, + time_series_identifier_column=constants.FORECASTNG_TIME_SERIES_IDENTIFIER_COLUMN, + unavailable_at_forecast_columns=constants.FORECASTNG_UNAVAILABLE_AT_FORECAST_COLUMNS, + available_at_forecast_columns=constants.FORECASTNG_AVAILABLE_AT_FORECAST_COLUMNS, + forecast_horizon=constants.FORECASTNG_FORECAST_HORIZON, + data_granularity_unit=constants.DATA_GRANULARITY_UNIT, + data_granularity_count=constants.DATA_GRANULARITY_COUNT, + training_fraction_split=constants.TRAINING_FRACTION_SPLIT, + validation_fraction_split=constants.VALIDATION_FRACTION_SPLIT, + test_fraction_split=constants.TEST_FRACTION_SPLIT, + budget_milli_node_hours=constants.BUDGET_MILLI_NODE_HOURS_8000, + model_display_name=constants.DISPLAY_NAME_2, + timestamp_split_column_name=constants.TIMESTAMP_SPLIT_COLUMN_NAME, + weight_column=constants.WEIGHT_COLUMN, + time_series_attribute_columns=constants.TIME_SERIES_ATTRIBUTE_COLUMNS, + context_window=constants.CONTEXT_WINDOW, + export_evaluated_data_items=constants.EXPORT_EVALUATED_DATA_ITEMS, + export_evaluated_data_items_bigquery_destination_uri=constants.EXPORT_EVALUATED_DATA_ITEMS_BIGQUERY_DESTINATION_URI, + export_evaluated_data_items_override_destination=constants.EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, + quantiles=constants.QUANTILES, + validation_options=constants.VALIDATION_OPTIONS, + predefined_split_column_name=constants.PREDEFINED_SPLIT_COLUMN_NAME, + sync=True, + ) diff --git a/samples/model-builder/create_training_pipeline_tabular_regression_sample.py b/samples/model-builder/create_training_pipeline_tabular_regression_sample.py index 9a3524f234..963c191547 100644 --- a/samples/model-builder/create_training_pipeline_tabular_regression_sample.py +++ b/samples/model-builder/create_training_pipeline_tabular_regression_sample.py @@ -21,7 +21,7 @@ def create_training_pipeline_tabular_regression_sample( display_name: str, dataset_id: str, location: str = "us-central1", - model_display_name: str = None, + model_display_name: str = "my_model", target_column: str = "target_column", training_fraction_split: float = 0.8, validation_fraction_split: float = 0.1,