diff --git a/bigquery/docs/snippets.py b/bigquery/docs/snippets.py index afa60f2dc440f..6da7ab13e657d 100644 --- a/bigquery/docs/snippets.py +++ b/bigquery/docs/snippets.py @@ -87,27 +87,199 @@ def delete(self): self._wrapped.close() -def dataset_exists(client, dataset_reference): - """Return if a dataset exists. +def test_create_client_default_credentials(): + """Create a BigQuery client with Application Default Credentials""" - Args: - client (google.cloud.bigquery.client.Client): - A client to connect to the BigQuery API. - dataset_reference (google.cloud.bigquery.dataset.DatasetReference): - A reference to the dataset to look for. + # [START bigquery_client_default_credentials] + from google.cloud import bigquery - Returns: - bool: ``True`` if the dataset exists, ``False`` otherwise. + # If you don't specify credentials when constructing the client, the + # client library will look for credentials in the environment. + client = bigquery.Client() + # [END bigquery_client_default_credentials] + + assert client is not None + + +def test_create_table_nested_repeated_schema(client, to_delete): + dataset_id = "create_table_nested_repeated_{}".format(_millis()) + dataset_ref = client.dataset(dataset_id) + dataset = bigquery.Dataset(dataset_ref) + client.create_dataset(dataset) + to_delete.append(dataset) + + # [START bigquery_nested_repeated_schema] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_ref = client.dataset('my_dataset') + + schema = [ + bigquery.SchemaField("id", "STRING", mode="NULLABLE"), + bigquery.SchemaField("first_name", "STRING", mode="NULLABLE"), + bigquery.SchemaField("last_name", "STRING", mode="NULLABLE"), + bigquery.SchemaField("dob", "DATE", mode="NULLABLE"), + bigquery.SchemaField( + "addresses", + "RECORD", + mode="REPEATED", + fields=[ + bigquery.SchemaField("status", "STRING", mode="NULLABLE"), + bigquery.SchemaField("address", "STRING", mode="NULLABLE"), + bigquery.SchemaField("city", "STRING", mode="NULLABLE"), + bigquery.SchemaField("state", "STRING", mode="NULLABLE"), + bigquery.SchemaField("zip", "STRING", mode="NULLABLE"), + bigquery.SchemaField("numberOfYears", "STRING", mode="NULLABLE"), + ], + ), + ] + table_ref = dataset_ref.table("my_table") + table = bigquery.Table(table_ref, schema=schema) + table = client.create_table(table) # API request + + print("Created table {}".format(table.full_table_id)) + # [END bigquery_nested_repeated_schema] + + +def test_create_table_cmek(client, to_delete): + dataset_id = "create_table_cmek_{}".format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + client.create_dataset(dataset) + to_delete.append(dataset) + + # [START bigquery_create_table_cmek] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_id = 'my_dataset' + + table_ref = client.dataset(dataset_id).table("my_table") + table = bigquery.Table(table_ref) + + # Set the encryption key to use for the table. + # TODO: Replace this key with a key you have created in Cloud KMS. + kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format( + "cloud-samples-tests", "us-central1", "test", "test" + ) + table.encryption_configuration = bigquery.EncryptionConfiguration( + kms_key_name=kms_key_name + ) + + table = client.create_table(table) # API request + + assert table.encryption_configuration.kms_key_name == kms_key_name + # [END bigquery_create_table_cmek] + + +def test_create_partitioned_table(client, to_delete): + dataset_id = "create_table_partitioned_{}".format(_millis()) + dataset_ref = bigquery.Dataset(client.dataset(dataset_id)) + dataset = client.create_dataset(dataset_ref) + to_delete.append(dataset) + + # [START bigquery_create_table_partitioned] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_ref = client.dataset('my_dataset') + + table_ref = dataset_ref.table("my_partitioned_table") + schema = [ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + bigquery.SchemaField("date", "DATE"), + ] + table = bigquery.Table(table_ref, schema=schema) + table.time_partitioning = bigquery.TimePartitioning( + type_=bigquery.TimePartitioningType.DAY, + field="date", # name of column to use for partitioning + expiration_ms=7776000000, + ) # 90 days + + table = client.create_table(table) + + print( + "Created table {}, partitioned on column {}".format( + table.table_id, table.time_partitioning.field + ) + ) + # [END bigquery_create_table_partitioned] + + assert table.time_partitioning.type_ == "DAY" + assert table.time_partitioning.field == "date" + assert table.time_partitioning.expiration_ms == 7776000000 + + +def test_load_and_query_partitioned_table(client, to_delete): + dataset_id = "load_partitioned_table_dataset_{}".format(_millis()) + dataset = bigquery.Dataset(client.dataset(dataset_id)) + client.create_dataset(dataset) + to_delete.append(dataset) + + # [START bigquery_load_table_partitioned] + # from google.cloud import bigquery + # client = bigquery.Client() + # dataset_id = 'my_dataset' + table_id = "us_states_by_date" + + dataset_ref = client.dataset(dataset_id) + job_config = bigquery.LoadJobConfig() + job_config.schema = [ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + bigquery.SchemaField("date", "DATE"), + ] + job_config.skip_leading_rows = 1 + job_config.time_partitioning = bigquery.TimePartitioning( + type_=bigquery.TimePartitioningType.DAY, + field="date", # name of column to use for partitioning + expiration_ms=7776000000, + ) # 90 days + uri = "gs://cloud-samples-data/bigquery/us-states/us-states-by-date.csv" + + load_job = client.load_table_from_uri( + uri, dataset_ref.table(table_id), job_config=job_config + ) # API request + + assert load_job.job_type == "load" + + load_job.result() # Waits for table load to complete. + + table = client.get_table(dataset_ref.table(table_id)) + print("Loaded {} rows to table {}".format(table.num_rows, table_id)) + # [END bigquery_load_table_partitioned] + assert table.num_rows == 50 + + project_id = client.project + + # [START bigquery_query_partitioned_table] + import datetime + + # from google.cloud import bigquery + # client = bigquery.Client() + # project_id = 'my-project' + # dataset_id = 'my_dataset' + table_id = "us_states_by_date" + + sql_template = """ + SELECT * + FROM `{}.{}.{}` + WHERE date BETWEEN @start_date AND @end_date """ - from google.cloud.exceptions import NotFound + sql = sql_template.format(project_id, dataset_id, table_id) + job_config = bigquery.QueryJobConfig() + job_config.query_parameters = [ + bigquery.ScalarQueryParameter("start_date", "DATE", datetime.date(1800, 1, 1)), + bigquery.ScalarQueryParameter("end_date", "DATE", datetime.date(1899, 12, 31)), + ] - try: - client.get_dataset(dataset_reference) - return True - except NotFound: - return False + # API request + query_job = client.query(sql, job_config=job_config) + rows = list(query_job) + print("{} states were admitted to the US in the 1800s".format(len(rows))) + # [END bigquery_query_partitioned_table] + assert len(rows) == 29 + +# [START bigquery_table_exists] def table_exists(client, table_reference): """Return if a table exists. @@ -289,43 +461,6 @@ def test_update_table_expiration(client, to_delete): # [END bigquery_update_table_expiration] -@pytest.mark.skip( - reason=( - "update_table() is flaky " - "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" - ) -) -def test_add_empty_column(client, to_delete): - """Adds an empty column to an existing table.""" - dataset_id = "add_empty_column_dataset_{}".format(_millis()) - table_id = "add_empty_column_table_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset = client.create_dataset(dataset) - to_delete.append(dataset) - - table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) - table = client.create_table(table) - - # [START bigquery_add_empty_column] - # from google.cloud import bigquery - # client = bigquery.Client() - # dataset_id = 'my_dataset' - # table_id = 'my_table' - - table_ref = client.dataset(dataset_id).table(table_id) - table = client.get_table(table_ref) # API request - - original_schema = table.schema - new_schema = original_schema[:] # creates a copy of the schema - new_schema.append(bigquery.SchemaField("phone", "STRING")) - - table.schema = new_schema - table = client.update_table(table, ["schema"]) # API request - - assert len(table.schema) == len(original_schema) + 1 == len(new_schema) - # [END bigquery_add_empty_column] - - @pytest.mark.skip( reason=( "update_table() is flaky " @@ -414,47 +549,6 @@ def test_update_table_cmek(client, to_delete): # [END bigquery_update_table_cmek] -def test_browse_table_data(client, to_delete, capsys): - """Retreive selected row data from a table.""" - - # [START bigquery_browse_table] - # from google.cloud import bigquery - # client = bigquery.Client() - - dataset_ref = client.dataset("samples", project="bigquery-public-data") - table_ref = dataset_ref.table("shakespeare") - table = client.get_table(table_ref) # API call - - # Load all rows from a table - rows = client.list_rows(table) - assert len(list(rows)) == table.num_rows - - # Load the first 10 rows - rows = client.list_rows(table, max_results=10) - assert len(list(rows)) == 10 - - # Specify selected fields to limit the results to certain columns - fields = table.schema[:2] # first two columns - rows = client.list_rows(table, selected_fields=fields, max_results=10) - assert len(rows.schema) == 2 - assert len(list(rows)) == 10 - - # Use the start index to load an arbitrary portion of the table - rows = client.list_rows(table, start_index=10, max_results=10) - - # Print row data in tabular format - format_string = "{!s:<16} " * len(rows.schema) - field_names = [field.name for field in rows.schema] - print(format_string.format(*field_names)) # prints column headers - for row in rows: - print(format_string.format(*row)) # prints row data - # [END bigquery_browse_table] - - out, err = capsys.readouterr() - out = list(filter(bool, out.split("\n"))) # list of non-blank lines - assert len(out) == 11 - - @pytest.mark.skip( reason=( "update_table() is flaky " diff --git a/bigquery/docs/usage/datasets.rst b/bigquery/docs/usage/datasets.rst index 97348f015c626..96650c5796a8f 100644 --- a/bigquery/docs/usage/datasets.rst +++ b/bigquery/docs/usage/datasets.rst @@ -28,7 +28,7 @@ List datasets for a project with the List datasets by label for a project with the :func:`~google.cloud.bigquery.client.Client.list_datasets` method: -.. literalinclude:: ../samples/samples/list_datasets_by_label.py +.. literalinclude:: ../samples/list_datasets_by_label.py :language: python :dedent: 4 :start-after: [START bigquery_list_datasets_by_label] @@ -46,6 +46,15 @@ Get a dataset resource (to pick up changes made by another client) with the :start-after: [START bigquery_get_dataset] :end-before: [END bigquery_get_dataset] +Determine if a dataset exists with the +:func:`~google.cloud.bigquery.client.Client.get_dataset` method: + +.. literalinclude:: ../samples/dataset_exists.py + :language: python + :dedent: 4 + :start-after: [START bigquery_dataset_exists] + :end-before: [END bigquery_dataset_exists] + Creating a Dataset ^^^^^^^^^^^^^^^^^^ @@ -79,6 +88,36 @@ Modify user permissions on a dataset with the :start-after: [START bigquery_update_dataset_access] :end-before: [END bigquery_update_dataset_access] +Manage Dataset labels +^^^^^^^^^^^^^^^^^^^^^ + +Add labels to a dataset with the +:func:`~google.cloud.bigquery.client.Client.update_dataset` method: + +.. literalinclude:: ../samples/label_dataset.py + :language: python + :dedent: 4 + :start-after: [START bigquery_label_dataset] + :end-before: [END bigquery_label_dataset] + +Get dataset's labels with the +:func:`~google.cloud.bigquery.client.Client.get_dataset` method: + +.. literalinclude:: ../samples/get_dataset_labels.py + :language: python + :dedent: 4 + :start-after: [START bigquery_get_dataset_labels] + :end-before: [END bigquery_get_dataset_labels] + +Delete dataset's labels with the +:func:`~google.cloud.bigquery.client.Client.update_dataset` method: + +.. literalinclude:: ../samples/delete_dataset_labels.py + :language: python + :dedent: 4 + :start-after: [START bigquery_delete_label_dataset] + :end-before: [END bigquery_delete_label_dataset] + Deleting a Dataset ^^^^^^^^^^^^^^^^^^ diff --git a/bigquery/docs/usage/tables.rst b/bigquery/docs/usage/tables.rst index 4bffe3fc11bbf..07b5604cf8e65 100644 --- a/bigquery/docs/usage/tables.rst +++ b/bigquery/docs/usage/tables.rst @@ -113,6 +113,15 @@ Insert rows into a table's data with the :start-after: [START bigquery_table_insert_rows] :end-before: [END bigquery_table_insert_rows] +Add an empty column to the existing table with the +:func:`~google.cloud.bigquery.update_table` method: + +.. literalinclude:: ../samples/add_empty_column.py + :language: python + :dedent: 4 + :start-after: [START bigquery_add_empty_column] + :end-before: [END bigquery_add_empty_column] + Copying a Table ^^^^^^^^^^^^^^^ diff --git a/bigquery/samples/add_empty_column.py b/bigquery/samples/add_empty_column.py index 29d6241f85910..eb84037598d3c 100644 --- a/bigquery/samples/add_empty_column.py +++ b/bigquery/samples/add_empty_column.py @@ -13,26 +13,18 @@ # limitations under the License. -def add_empty_column(client, to_delete): +def add_empty_column(client, table_id): # [START bigquery_add_empty_column] - """Adds an empty column to an existing table.""" - dataset_id = "add_empty_column_dataset_{}".format(_millis()) - table_id = "add_empty_column_table_{}".format(_millis()) - dataset = bigquery.Dataset(client.dataset(dataset_id)) - dataset = client.create_dataset(dataset) - to_delete.append(dataset) + from google.cloud import bigquery - table = bigquery.Table(dataset.table(table_id), schema=SCHEMA) - table = client.create_table(table) - - # from google.cloud import bigquery + # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() - # dataset_id = 'my_dataset' - # table_id = 'my_table' - table_ref = client.dataset(dataset_id).table(table_id) - table = client.get_table(table_ref) # API request + # TODO(developer): Set table_id to the ID of the table to add an empty column. + # table_id = "your-project.your_dataset.your_table_name" + + table = client.get_table(table_id) original_schema = table.schema new_schema = original_schema[:] # creates a copy of the schema @@ -41,6 +33,8 @@ def add_empty_column(client, to_delete): table.schema = new_schema table = client.update_table(table, ["schema"]) # API request - assert len(table.schema) == len(original_schema) + 1 == len(new_schema) - - # [END bigquery_add_empty_column] \ No newline at end of file + if len(table.schema) == len(original_schema) + 1 == len(new_schema): + print("A new column has been added.") + else: + print("The column has not been added.") + # [END bigquery_add_empty_column] diff --git a/bigquery/samples/browse_table_data.py b/bigquery/samples/browse_table_data.py index 35dda3d555822..dd6c572cab6da 100644 --- a/bigquery/samples/browse_table_data.py +++ b/bigquery/samples/browse_table_data.py @@ -13,44 +13,35 @@ # limitations under the License. -def browse_table_data(client, to_delete, capsys): +def browse_table_data(client, table_id): # [START bigquery_browse_table] - """Retreive selected row data from a table.""" - + # TODO(developer): Import the client library. # from google.cloud import bigquery - # client = bigquery.Client() - - dataset_ref = client.dataset("samples", project="bigquery-public-data") - table_ref = dataset_ref.table("shakespeare") - table = client.get_table(table_ref) # API call - - # Load all rows from a table - rows = client.list_rows(table) - assert len(list(rows)) == table.num_rows - # Load the first 10 rows - rows = client.list_rows(table, max_results=10) - assert len(list(rows)) == 10 + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() - # Specify selected fields to limit the results to certain columns - fields = table.schema[:2] # first two columns - rows = client.list_rows(table, selected_fields=fields, max_results=10) - assert len(rows.schema) == 2 - assert len(list(rows)) == 10 + # TODO(developer): Set table_id to the ID of the table to browse data rows. + # table_id = "your-project.your_dataset.your_table_name" - # Use the start index to load an arbitrary portion of the table - rows = client.list_rows(table, start_index=10, max_results=10) + # Download all rows from a table. + rows_iter = client.list_rows(table_id) - # Print row data in tabular format - format_string = "{!s:<16} " * len(rows.schema) - field_names = [field.name for field in rows.schema] - print(format_string.format(*field_names)) # prints column headers - for row in rows: - print(format_string.format(*row)) # prints row data + # Iterate over rows to make the API requests to fetch row data. + rows = list(rows_iter) + print("Downloaded {} rows from table {}".format(len(rows), table_id)) - out, err = capsys.readouterr() - out = list(filter(bool, out.split("\n"))) # list of non-blank lines - assert len(out) == 11 + # Download at most 10 rows. + rows_iter = client.list_rows(table_id, max_results=10) + rows = list(rows_iter) + print("Downloaded {} rows from table {}".format(len(rows), table_id)) - # [END bigquery_browse_table] \ No newline at end of file + # Specify selected fields to limit the results to certain columns. + table = client.get_table(table_id) + fields = table.schema[:2] # first two columns + rows_iter = client.list_rows(table_id, selected_fields=fields, max_results=10) + rows = list(rows_iter) + print("Selected {} columns from table {}.".format(len(rows_iter.schema), table_id)) + print("Downloaded {} rows from table {}".format(len(rows), table_id)) + # [END bigquery_browse_table] diff --git a/bigquery/samples/create_job.py b/bigquery/samples/create_job.py index 7570dc49f1d29..24bb85510598f 100644 --- a/bigquery/samples/create_job.py +++ b/bigquery/samples/create_job.py @@ -14,6 +14,7 @@ def create_job(client): + # [START bigquery_create_job] from google.cloud import bigquery diff --git a/bigquery/samples/create_routine.py b/bigquery/samples/create_routine.py index 18b999980d724..c08ec4799a3e9 100644 --- a/bigquery/samples/create_routine.py +++ b/bigquery/samples/create_routine.py @@ -13,7 +13,8 @@ # limitations under the License. -def main(client, routine_id): +def create_routine(client, routine_id): + # [START bigquery_create_routine] from google.cloud import bigquery from google.cloud import bigquery_v2 diff --git a/bigquery/samples/create_routine_ddl.py b/bigquery/samples/create_routine_ddl.py index aa6254b1139a3..a4ae3318e7b42 100644 --- a/bigquery/samples/create_routine_ddl.py +++ b/bigquery/samples/create_routine_ddl.py @@ -13,7 +13,8 @@ # limitations under the License. -def main(client, routine_id): +def create_routine_ddl(client, routine_id): + # [START bigquery_create_routine_ddl] # TODO(developer): Import the client library. # from google.cloud import bigquery diff --git a/bigquery/samples/create_table.py b/bigquery/samples/create_table.py index 5e2e34d41d997..2a6e98fc72f62 100644 --- a/bigquery/samples/create_table.py +++ b/bigquery/samples/create_table.py @@ -18,17 +18,17 @@ def create_table(client, table_id): # [START bigquery_create_table] from google.cloud import bigquery - schema = [ - bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), - bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), - ] - # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() # TODO(developer): Set table_id to the ID of the table to create # table_id = "your-project.your_dataset.your_table_name" + schema = [ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + table = bigquery.Table(table_id, schema=schema) table = client.create_table(table) # API request print( diff --git a/bigquery/samples/dataset_exists.py b/bigquery/samples/dataset_exists.py index ed09173ca89a5..46cf26a623bff 100644 --- a/bigquery/samples/dataset_exists.py +++ b/bigquery/samples/dataset_exists.py @@ -13,17 +13,17 @@ # limitations under the License. -def dataset_exists(client, to_delete): +def dataset_exists(client, dataset_id): # [START bigquery_dataset_exists] - """Determine if a dataset exists.""" - DATASET_ID = "get_table_dataset_{}".format(_millis()) - dataset_ref = client.dataset(DATASET_ID) - dataset = bigquery.Dataset(dataset_ref) - dataset = client.create_dataset(dataset) - to_delete.append(dataset) + from google.cloud.exceptions import NotFound - assert dataset_exists(client, dataset_ref) - assert not dataset_exists(client, client.dataset("i_dont_exist")) + # TODO(developer): Set dataset_id to the ID of the dataset to determine existence. + # dataset_id = "your-project.your_dataset" - # [END bigquery_dataset_exists] \ No newline at end of file + try: + client.get_dataset(dataset_id) + print("Dataset {} already exists".format(dataset_id)) + except NotFound: + print("Dataset {} is not found".format(dataset_id)) + # [END bigquery_dataset_exists] diff --git a/bigquery/samples/delete_dataset.py b/bigquery/samples/delete_dataset.py index 58851f1e21208..6cde1b6b2d27f 100644 --- a/bigquery/samples/delete_dataset.py +++ b/bigquery/samples/delete_dataset.py @@ -16,7 +16,8 @@ def delete_dataset(client, dataset_id): # [START bigquery_delete_dataset] - from google.cloud import bigquery + # TODO(developer): Import the client library. + # from google.cloud import bigquery # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() diff --git a/bigquery/samples/delete_dataset_labels.py b/bigquery/samples/delete_dataset_labels.py new file mode 100644 index 0000000000000..33ff5c0f26201 --- /dev/null +++ b/bigquery/samples/delete_dataset_labels.py @@ -0,0 +1,36 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def delete_dataset_labels(client, dataset_id): + + # [START bigquery_delete_label_dataset] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set dataset_id to the ID of the dataset to fetch. + # dataset_id = "your-project.your_dataset" + + dataset = client.get_dataset(dataset_id) + + # To delete a label from a dataset, set its value to None + dataset.labels["color"] = None + + dataset = client.update_dataset(dataset, ["labels"]) + print("Labels deleted from {}".format(dataset_id)) + # [END bigquery_delete_label_dataset] + return dataset diff --git a/bigquery/samples/delete_model.py b/bigquery/samples/delete_model.py index 371f9003576b5..5ac4305bc97e1 100644 --- a/bigquery/samples/delete_model.py +++ b/bigquery/samples/delete_model.py @@ -17,7 +17,8 @@ def delete_model(client, model_id): """Sample ID: go/samples-tracker/1534""" # [START bigquery_delete_model] - from google.cloud import bigquery + # TODO(developer): Import the client library. + # from google.cloud import bigquery # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() @@ -26,5 +27,6 @@ def delete_model(client, model_id): # model_id = 'your-project.your_dataset.your_model' client.delete_model(model_id) + print("Deleted model '{}'.".format(model_id)) # [END bigquery_delete_model] diff --git a/bigquery/samples/delete_routine.py b/bigquery/samples/delete_routine.py index 505faa4780f3b..c0164b4150081 100644 --- a/bigquery/samples/delete_routine.py +++ b/bigquery/samples/delete_routine.py @@ -13,7 +13,8 @@ # limitations under the License. -def main(client, routine_id): +def delete_routine(client, routine_id): + # [START bigquery_delete_routine] # TODO(developer): Import the client library. # from google.cloud import bigquery @@ -25,6 +26,6 @@ def main(client, routine_id): # routine_id = "my-project.my_dataset.my_routine" client.delete_routine(routine_id) - # [END bigquery_delete_routine] print("Deleted routine {}.".format(routine_id)) + # [END bigquery_delete_routine] diff --git a/bigquery/samples/delete_table.py b/bigquery/samples/delete_table.py index 3eb7dc918da74..dcdd3d855b2ee 100644 --- a/bigquery/samples/delete_table.py +++ b/bigquery/samples/delete_table.py @@ -16,7 +16,8 @@ def delete_table(client, table_id): # [START bigquery_delete_table] - from google.cloud import bigquery + # TODO(developer): Import the client library. + # from google.cloud import bigquery # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() diff --git a/bigquery/samples/get_dataset.py b/bigquery/samples/get_dataset.py index eeab2e088d2f0..5586c2b95ebb5 100644 --- a/bigquery/samples/get_dataset.py +++ b/bigquery/samples/get_dataset.py @@ -16,7 +16,8 @@ def get_dataset(client, dataset_id): # [START bigquery_get_dataset] - from google.cloud import bigquery + # TODO(developer): Import the client library. + # from google.cloud import bigquery # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() @@ -52,5 +53,4 @@ def get_dataset(client, dataset_id): print("\t{}".format(table.table_id)) else: print("\tThis dataset does not contain any tables.") - # [END bigquery_get_dataset] diff --git a/bigquery/samples/get_dataset_labels.py b/bigquery/samples/get_dataset_labels.py new file mode 100644 index 0000000000000..2f21723a550b4 --- /dev/null +++ b/bigquery/samples/get_dataset_labels.py @@ -0,0 +1,38 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def get_dataset_labels(client, dataset_id): + + # [START bigquery_get_dataset_labels] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set dataset_id to the ID of the dataset to fetch. + # dataset_id = "your-project.your_dataset" + + dataset = client.get_dataset(dataset_id) + + # View dataset labels + print("Dataset ID: {}".format(dataset_id)) + print("Labels:") + if dataset.labels: + for label, value in dataset.labels.items(): + print("\t{}: {}".format(label, value)) + else: + print("\tDataset has no labels defined.") + # [END bigquery_get_dataset_labels] diff --git a/bigquery/samples/get_model.py b/bigquery/samples/get_model.py index 8e43e53ec4509..69986733c50b6 100644 --- a/bigquery/samples/get_model.py +++ b/bigquery/samples/get_model.py @@ -17,7 +17,8 @@ def get_model(client, model_id): """Sample ID: go/samples-tracker/1510""" # [START bigquery_get_model] - from google.cloud import bigquery + # TODO(developer): Import the client library. + # from google.cloud import bigquery # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() diff --git a/bigquery/samples/get_routine.py b/bigquery/samples/get_routine.py index 5850d8d064773..d9035c2824384 100644 --- a/bigquery/samples/get_routine.py +++ b/bigquery/samples/get_routine.py @@ -13,7 +13,8 @@ # limitations under the License. -def main(client, routine_id): +def get_routine(client, routine_id): + # [START bigquery_get_routine] # TODO(developer): Import the client library. # from google.cloud import bigquery @@ -34,6 +35,5 @@ def main(client, routine_id): for argument in routine.arguments: print(" Name: '{}'".format(argument.name)) print(" Type: '{}'".format(argument.type_)) - # [END bigquery_get_routine] return routine diff --git a/bigquery/samples/get_table.py b/bigquery/samples/get_table.py index e6a5c502e2b33..e49e032f6e235 100644 --- a/bigquery/samples/get_table.py +++ b/bigquery/samples/get_table.py @@ -16,7 +16,8 @@ def get_table(client, table_id): # [START bigquery_get_table] - from google.cloud import bigquery + # TODO(developer): Import the client library. + # from google.cloud import bigquery # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() diff --git a/bigquery/samples/label_dataset.py b/bigquery/samples/label_dataset.py new file mode 100644 index 0000000000000..7840ea25a63f0 --- /dev/null +++ b/bigquery/samples/label_dataset.py @@ -0,0 +1,33 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def label_dataset(client, dataset_id): + + # [START bigquery_label_dataset] + # TODO(developer): Import the client library. + # from google.cloud import bigquery + + # TODO(developer): Construct a BigQuery client object. + # client = bigquery.Client() + + # TODO(developer): Set dataset_id to the ID of the dataset to fetch. + # dataset_id = "your-project.your_dataset" + + dataset = client.get_dataset(dataset_id) + dataset.labels = {"color": "green"} + dataset = client.update_dataset(dataset, ["labels"]) + + print("Labels added to {}".format(dataset_id)) + # [END bigquery_label_dataset] diff --git a/bigquery/samples/list_datasets.py b/bigquery/samples/list_datasets.py index c9ddf4f2523c2..b57aad1b5e7bd 100644 --- a/bigquery/samples/list_datasets.py +++ b/bigquery/samples/list_datasets.py @@ -16,7 +16,8 @@ def list_datasets(client): # [START bigquery_list_datasets] - from google.cloud import bigquery + # TODO(developer): Import the client library. + # from google.cloud import bigquery # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() diff --git a/bigquery/samples/list_datasets_by_label.py b/bigquery/samples/list_datasets_by_label.py index 26eb038c59e2b..8b574b1110eb5 100644 --- a/bigquery/samples/list_datasets_by_label.py +++ b/bigquery/samples/list_datasets_by_label.py @@ -16,7 +16,8 @@ def list_datasets_by_label(client): # [START bigquery_list_datasets_by_label] - from google.cloud import bigquery + # TODO(developer): Import the client library. + # from google.cloud import bigquery # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() @@ -26,9 +27,8 @@ def list_datasets_by_label(client): if datasets: print("Datasets filtered by {}:".format(label_filter)) - for dataset in datasets: # API request(s) - print("\t{}".format(dataset.dataset_id)) + for dataset in datasets: + print("\t{}.{}".format(dataset.project, dataset.dataset_id)) else: print("No datasets found with this filter.") - # [END bigquery_list_datasets_by_label] diff --git a/bigquery/samples/list_models.py b/bigquery/samples/list_models.py index cb6e4fb5569f7..5b4d21799b282 100644 --- a/bigquery/samples/list_models.py +++ b/bigquery/samples/list_models.py @@ -17,7 +17,8 @@ def list_models(client, dataset_id): """Sample ID: go/samples-tracker/1512""" # [START bigquery_list_models] - from google.cloud import bigquery + # TODO(developer): Import the client library. + # from google.cloud import bigquery # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() diff --git a/bigquery/samples/list_routines.py b/bigquery/samples/list_routines.py index 9e90c87a3d9ce..1ae4f441cde1f 100644 --- a/bigquery/samples/list_routines.py +++ b/bigquery/samples/list_routines.py @@ -13,7 +13,7 @@ # limitations under the License. -def main(client, dataset_id): +def list_routines(client, dataset_id): # [START bigquery_list_routines] # TODO(developer): Import the client library. diff --git a/bigquery/samples/list_tables.py b/bigquery/samples/list_tables.py index 33ed408906b02..2057f2d73891c 100644 --- a/bigquery/samples/list_tables.py +++ b/bigquery/samples/list_tables.py @@ -16,7 +16,8 @@ def list_tables(client, dataset_id): # [START bigquery_list_tables] - from google.cloud import bigquery + # TODO(developer): Import the client library. + # from google.cloud import bigquery # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() diff --git a/bigquery/samples/query_to_arrow.py b/bigquery/samples/query_to_arrow.py index e3ddc23f889a3..b13dcf3e1413a 100644 --- a/bigquery/samples/query_to_arrow.py +++ b/bigquery/samples/query_to_arrow.py @@ -13,7 +13,8 @@ # limitations under the License. -def main(client): +def query_to_arrow(client): + # [START bigquery_query_to_arrow] # TODO(developer): Import the client library. # from google.cloud import bigquery @@ -50,9 +51,3 @@ def main(client): print("\nSchema:\n{}".format(repr(arrow_table.schema))) # [END bigquery_query_to_arrow] return arrow_table - - -if __name__ == "__main__": - from google.cloud import bigquery - - main(bigquery.Client()) diff --git a/bigquery/samples/tests/conftest.py b/bigquery/samples/tests/conftest.py index 6156226ecb4ba..cf94835d8d716 100644 --- a/bigquery/samples/tests/conftest.py +++ b/bigquery/samples/tests/conftest.py @@ -87,6 +87,11 @@ def table_id(client, dataset_id): client.delete_table(table, not_found_ok=True) +@pytest.fixture +def table_with_data_id(client): + return "bigquery-public-data.samples.shakespeare" + + @pytest.fixture def routine_id(client, dataset_id): now = datetime.datetime.now() diff --git a/bigquery/samples/tests/test_add_empty_column.py b/bigquery/samples/tests/test_add_empty_column.py index 8268954bf84c9..e6c56e6cbfbc7 100644 --- a/bigquery/samples/tests/test_add_empty_column.py +++ b/bigquery/samples/tests/test_add_empty_column.py @@ -16,12 +16,8 @@ from .. import add_empty_column -def test_add_empty_column(capsys, client): - - schema = [ - bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), - bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), - ] +def test_add_empty_column(capsys, client, table_id): + add_empty_column.add_empty_column(client, table_id) out, err = capsys.readouterr() - assert \ No newline at end of file + assert "A new column has been added." in out diff --git a/bigquery/samples/tests/test_browse_table_data.py b/bigquery/samples/tests/test_browse_table_data.py index 43b7fb16fc424..f777bf91ca002 100644 --- a/bigquery/samples/tests/test_browse_table_data.py +++ b/bigquery/samples/tests/test_browse_table_data.py @@ -16,7 +16,11 @@ from .. import browse_table_data -def test_browse_table_data(capsys, client): +def test_browse_table_data(capsys, client, table_with_data_id): + browse_table_data.browse_table_data(client, table_with_data_id) out, err = capsys.readouterr() - assert \ No newline at end of file + assert "Downloaded 164656 rows from table {}".format(table_with_data_id) in out + assert "Downloaded 10 rows from table {}".format(table_with_data_id) in out + assert "Selected 2 columns from table {}".format(table_with_data_id) in out + assert "Downloaded 10 rows from table {}".format(table_with_data_id) in out diff --git a/bigquery/samples/tests/test_dataset_exists.py b/bigquery/samples/tests/test_dataset_exists.py index de4ad29de42f5..203c4b884dc44 100644 --- a/bigquery/samples/tests/test_dataset_exists.py +++ b/bigquery/samples/tests/test_dataset_exists.py @@ -13,10 +13,18 @@ # limitations under the License. +from google.cloud import bigquery + from .. import dataset_exists -def test_dataset_exists(capsys, client): +def test_dataset_exists(capsys, client, random_dataset_id): + dataset_exists.dataset_exists(client, random_dataset_id) + out, err = capsys.readouterr() + assert "Dataset {} is not found".format(random_dataset_id) in out + dataset = bigquery.Dataset(random_dataset_id) + dataset = client.create_dataset(dataset) + dataset_exists.dataset_exists(client, random_dataset_id) out, err = capsys.readouterr() - assert \ No newline at end of file + assert "Dataset {} already exists".format(random_dataset_id) in out diff --git a/bigquery/samples/tests/test_dataset_label_samples.py b/bigquery/samples/tests/test_dataset_label_samples.py new file mode 100644 index 0000000000000..94a2092407b0f --- /dev/null +++ b/bigquery/samples/tests/test_dataset_label_samples.py @@ -0,0 +1,33 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import delete_dataset_labels +from .. import get_dataset_labels +from .. import label_dataset + + +def test_dataset_label_samples(capsys, client, dataset_id): + + label_dataset.label_dataset(client, dataset_id) + out, err = capsys.readouterr() + assert "Labels added to {}".format(dataset_id) in out + + get_dataset_labels.get_dataset_labels(client, dataset_id) + out, err = capsys.readouterr() + assert "color: green" in out + + dataset = delete_dataset_labels.delete_dataset_labels(client, dataset_id) + out, err = capsys.readouterr() + assert "Labels deleted from {}".format(dataset_id) in out + assert dataset.labels.get("color") is None diff --git a/bigquery/samples/tests/test_get_table.py b/bigquery/samples/tests/test_get_table.py index debf1b63a3fc7..b811ccecad1f1 100644 --- a/bigquery/samples/tests/test_get_table.py +++ b/bigquery/samples/tests/test_get_table.py @@ -17,6 +17,7 @@ def test_get_table(capsys, client, random_table_id): + schema = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), diff --git a/bigquery/samples/tests/test_list_datasets_by_label.py b/bigquery/samples/tests/test_list_datasets_by_label.py index 80fbf641d409d..346cbf1a982db 100644 --- a/bigquery/samples/tests/test_list_datasets_by_label.py +++ b/bigquery/samples/tests/test_list_datasets_by_label.py @@ -16,9 +16,11 @@ from .. import list_datasets_by_label -def test_list_datasets_by_label(capsys, client, dataset_id, dataset_label): +def test_list_datasets_by_label(capsys, client, dataset_id): - label_filter = "labels.color:green" + dataset = client.get_dataset(dataset_id) + dataset.labels = {"color": "green"} + dataset = client.update_dataset(dataset, ["labels"]) list_datasets_by_label.list_datasets_by_label(client) out, err = capsys.readouterr() - assert "Datasets filtered by {}:".format(label_filter) in out + assert "{}".format(dataset_id) in out diff --git a/bigquery/samples/tests/test_query_to_arrow.py b/bigquery/samples/tests/test_query_to_arrow.py index 9e36bcee346fb..f70bd49fe5654 100644 --- a/bigquery/samples/tests/test_query_to_arrow.py +++ b/bigquery/samples/tests/test_query_to_arrow.py @@ -17,9 +17,9 @@ from .. import query_to_arrow -def test_main(capsys, client): +def test_query_to_arrow(capsys, client): - arrow_table = query_to_arrow.main(client) + arrow_table = query_to_arrow.query_to_arrow(client) out, err = capsys.readouterr() assert "Downloaded 8 rows, 2 columns." in out diff --git a/bigquery/samples/tests/test_routine_samples.py b/bigquery/samples/tests/test_routine_samples.py index 5905d2e69439e..5a1c69c7f60ff 100644 --- a/bigquery/samples/tests/test_routine_samples.py +++ b/bigquery/samples/tests/test_routine_samples.py @@ -19,7 +19,7 @@ def test_create_routine(capsys, client, random_routine_id): from .. import create_routine - create_routine.main(client, random_routine_id) + create_routine.create_routine(client, random_routine_id) out, err = capsys.readouterr() assert "Created routine {}".format(random_routine_id) in out @@ -27,7 +27,7 @@ def test_create_routine(capsys, client, random_routine_id): def test_create_routine_ddl(capsys, client, random_routine_id): from .. import create_routine_ddl - create_routine_ddl.main(client, random_routine_id) + create_routine_ddl.create_routine_ddl(client, random_routine_id) routine = client.get_routine(random_routine_id) out, err = capsys.readouterr() @@ -68,7 +68,7 @@ def test_create_routine_ddl(capsys, client, random_routine_id): def test_list_routines(capsys, client, dataset_id, routine_id): from .. import list_routines - list_routines.main(client, dataset_id) + list_routines.list_routines(client, dataset_id) out, err = capsys.readouterr() assert "Routines contained in dataset {}:".format(dataset_id) in out assert routine_id in out @@ -77,7 +77,7 @@ def test_list_routines(capsys, client, dataset_id, routine_id): def test_delete_routine(capsys, client, routine_id): from .. import delete_routine - delete_routine.main(client, routine_id) + delete_routine.delete_routine(client, routine_id) out, err = capsys.readouterr() assert "Deleted routine {}.".format(routine_id) in out @@ -85,5 +85,5 @@ def test_delete_routine(capsys, client, routine_id): def test_update_routine(client, routine_id): from .. import update_routine - routine = update_routine.main(client, routine_id) + routine = update_routine.update_routine(client, routine_id) assert routine.body == "x * 4" diff --git a/bigquery/samples/update_dataset_default_table_expiration.py b/bigquery/samples/update_dataset_default_table_expiration.py index a5ac38c01a996..4534bb2011ebb 100644 --- a/bigquery/samples/update_dataset_default_table_expiration.py +++ b/bigquery/samples/update_dataset_default_table_expiration.py @@ -16,7 +16,8 @@ def update_dataset_default_table_expiration(client, dataset_id): # [START bigquery_update_dataset_expiration] - from google.cloud import bigquery + # TODO(developer): Import the client library. + # from google.cloud import bigquery # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() diff --git a/bigquery/samples/update_dataset_description.py b/bigquery/samples/update_dataset_description.py index 70be80b7507e4..f3afb7fa68ce5 100644 --- a/bigquery/samples/update_dataset_description.py +++ b/bigquery/samples/update_dataset_description.py @@ -16,7 +16,8 @@ def update_dataset_description(client, dataset_id): # [START bigquery_update_dataset_description] - from google.cloud import bigquery + # TODO(developer): Import the client library. + # from google.cloud import bigquery # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() diff --git a/bigquery/samples/update_model.py b/bigquery/samples/update_model.py index 2440066ae1ec6..5df4ada886ed5 100644 --- a/bigquery/samples/update_model.py +++ b/bigquery/samples/update_model.py @@ -17,7 +17,8 @@ def update_model(client, model_id): """Sample ID: go/samples-tracker/1533""" # [START bigquery_update_model_description] - from google.cloud import bigquery + # TODO(developer): Import the client library. + # from google.cloud import bigquery # TODO(developer): Construct a BigQuery client object. # client = bigquery.Client() diff --git a/bigquery/samples/update_routine.py b/bigquery/samples/update_routine.py index 8683e761562f9..4d491d42e1687 100644 --- a/bigquery/samples/update_routine.py +++ b/bigquery/samples/update_routine.py @@ -13,7 +13,8 @@ # limitations under the License. -def main(client, routine_id): +def update_routine(client, routine_id): + # [START bigquery_update_routine] # TODO(developer): Import the client library. # from google.cloud import bigquery