From 4ba434287a0a25f027e3b63a80f8881a9b16723e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 23 Jan 2024 14:08:58 -0600 Subject: [PATCH 1/2] fix: `query_and_wait` now retains unknown query configuration `_properties` (#1793) * fix: `query_and_wait` now retains unknown query configuration `_properties` fix: raise `ValueError` in `query_and_wait` with wrong `job_config` type --- google/cloud/bigquery/_job_helpers.py | 24 +++++---- tests/unit/test__job_helpers.py | 75 +++++++++++++++++++++++---- 2 files changed, 79 insertions(+), 20 deletions(-) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 7356331b8f..6debc377b7 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -166,6 +166,14 @@ def do_query(): return future +def _validate_job_config(request_body: Dict[str, Any], invalid_key: str): + """Catch common mistakes, such as passing in a *JobConfig object of the + wrong type. + """ + if invalid_key in request_body: + raise ValueError(f"got unexpected key {repr(invalid_key)} in job_config") + + def _to_query_request( job_config: Optional[job.QueryJobConfig] = None, *, @@ -179,17 +187,15 @@ def _to_query_request( QueryRequest. If any configuration property is set that is not available in jobs.query, it will result in a server-side error. """ - request_body = {} - job_config_resource = job_config.to_api_repr() if job_config else {} - query_config_resource = job_config_resource.get("query", {}) + request_body = copy.copy(job_config.to_api_repr()) if job_config else {} - request_body.update(query_config_resource) + _validate_job_config(request_body, job.CopyJob._JOB_TYPE) + _validate_job_config(request_body, job.ExtractJob._JOB_TYPE) + _validate_job_config(request_body, job.LoadJob._JOB_TYPE) - # These keys are top level in job resource and query resource. - if "labels" in job_config_resource: - request_body["labels"] = job_config_resource["labels"] - if "dryRun" in job_config_resource: - request_body["dryRun"] = job_config_resource["dryRun"] + # Move query.* properties to top-level. + query_config_resource = request_body.pop("query", {}) + request_body.update(query_config_resource) # Default to standard SQL. request_body.setdefault("useLegacySql", False) diff --git a/tests/unit/test__job_helpers.py b/tests/unit/test__job_helpers.py index f2fe32d94c..404a546ff1 100644 --- a/tests/unit/test__job_helpers.py +++ b/tests/unit/test__job_helpers.py @@ -23,6 +23,9 @@ from google.cloud.bigquery.client import Client from google.cloud.bigquery import _job_helpers +from google.cloud.bigquery.job import copy_ as job_copy +from google.cloud.bigquery.job import extract as job_extract +from google.cloud.bigquery.job import load as job_load from google.cloud.bigquery.job import query as job_query from google.cloud.bigquery.query import ConnectionProperty, ScalarQueryParameter @@ -57,9 +60,34 @@ def make_query_response( @pytest.mark.parametrize( ("job_config", "expected"), ( - (None, make_query_request()), - (job_query.QueryJobConfig(), make_query_request()), - ( + pytest.param( + None, + make_query_request(), + id="job_config=None-default-request", + ), + pytest.param( + job_query.QueryJobConfig(), + make_query_request(), + id="job_config=QueryJobConfig()-default-request", + ), + pytest.param( + job_query.QueryJobConfig.from_api_repr( + { + "unknownTopLevelProperty": "some-test-value", + "query": { + "unknownQueryProperty": "some-other-value", + }, + }, + ), + make_query_request( + { + "unknownTopLevelProperty": "some-test-value", + "unknownQueryProperty": "some-other-value", + } + ), + id="job_config-with-unknown-properties-includes-all-properties-in-request", + ), + pytest.param( job_query.QueryJobConfig(default_dataset="my-project.my_dataset"), make_query_request( { @@ -69,17 +97,24 @@ def make_query_response( } } ), + id="job_config-with-default_dataset", ), - (job_query.QueryJobConfig(dry_run=True), make_query_request({"dryRun": True})), - ( + pytest.param( + job_query.QueryJobConfig(dry_run=True), + make_query_request({"dryRun": True}), + id="job_config-with-dry_run", + ), + pytest.param( job_query.QueryJobConfig(use_query_cache=False), make_query_request({"useQueryCache": False}), + id="job_config-with-use_query_cache", ), - ( + pytest.param( job_query.QueryJobConfig(use_legacy_sql=True), make_query_request({"useLegacySql": True}), + id="job_config-with-use_legacy_sql", ), - ( + pytest.param( job_query.QueryJobConfig( query_parameters=[ ScalarQueryParameter("named_param1", "STRING", "param-value"), @@ -103,8 +138,9 @@ def make_query_response( ], } ), + id="job_config-with-query_parameters-named", ), - ( + pytest.param( job_query.QueryJobConfig( query_parameters=[ ScalarQueryParameter(None, "STRING", "param-value"), @@ -126,8 +162,9 @@ def make_query_response( ], } ), + id="job_config-with-query_parameters-positional", ), - ( + pytest.param( job_query.QueryJobConfig( connection_properties=[ ConnectionProperty(key="time_zone", value="America/Chicago"), @@ -142,14 +179,17 @@ def make_query_response( ] } ), + id="job_config-with-connection_properties", ), - ( + pytest.param( job_query.QueryJobConfig(labels={"abc": "def"}), make_query_request({"labels": {"abc": "def"}}), + id="job_config-with-labels", ), - ( + pytest.param( job_query.QueryJobConfig(maximum_bytes_billed=987654), make_query_request({"maximumBytesBilled": "987654"}), + id="job_config-with-maximum_bytes_billed", ), ), ) @@ -159,6 +199,19 @@ def test__to_query_request(job_config, expected): assert result == expected +@pytest.mark.parametrize( + ("job_config", "invalid_key"), + ( + pytest.param(job_copy.CopyJobConfig(), "copy", id="copy"), + pytest.param(job_extract.ExtractJobConfig(), "extract", id="extract"), + pytest.param(job_load.LoadJobConfig(), "load", id="load"), + ), +) +def test__to_query_request_raises_for_invalid_config(job_config, invalid_key): + with pytest.raises(ValueError, match=f"{repr(invalid_key)} in job_config"): + _job_helpers._to_query_request(job_config, query="SELECT 1") + + def test__to_query_job_defaults(): mock_client = mock.create_autospec(Client) response = make_query_response( From 6559dde1568b2d07c1e0a142194f2b370efb8983 Mon Sep 17 00:00:00 2001 From: Gaurang Shah Date: Tue, 23 Jan 2024 19:03:36 -0500 Subject: [PATCH 2/2] feature: add query location for bigquery magic (#1771) Co-authored-by: Lingqing Gan --- google/cloud/bigquery/magics/magics.py | 11 +++++++++++ tests/unit/test_magics.py | 18 ++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 2a3583c66a..b7c685d9a8 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -508,6 +508,15 @@ def _create_dataset_if_necessary(client, dataset_id): "Defaults to use tqdm_notebook. Install the ``tqdm`` package to use this feature." ), ) +@magic_arguments.argument( + "--location", + type=str, + default=None, + help=( + "Set the location to execute query." + "Defaults to location set in query setting in console." + ), +) def _cell_magic(line, query): """Underlying function for bigquery cell magic @@ -551,6 +560,7 @@ def _cell_magic(line, query): category=DeprecationWarning, ) use_bqstorage_api = not args.use_rest_api + location = args.location params = [] if params_option_value: @@ -579,6 +589,7 @@ def _cell_magic(line, query): default_query_job_config=context.default_query_job_config, client_info=client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), client_options=bigquery_client_options, + location=location, ) if context._connection: client._connection = context._connection diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index b038940951..1511cba9c9 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -2053,3 +2053,21 @@ def test_bigquery_magic_create_dataset_fails(): ) assert close_transports.called + + +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_with_location(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + with run_query_patch as run_query_mock: + ip.run_cell_magic("bigquery", "--location=us-east1", "SELECT 17 AS num") + + client_options_used = run_query_mock.call_args_list[0][0][0] + assert client_options_used.location == "us-east1"