Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added collection of Managing Dataset samples #6

Merged
merged 13 commits into from
Apr 8, 2019
Merged
102 changes: 0 additions & 102 deletions bigquery/docs/snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,24 +147,6 @@ def test_create_client_json_credentials():
assert client is not None


def test_list_datasets(client):
"""List datasets for a project."""
# [START bigquery_list_datasets]
# from google.cloud import bigquery
# client = bigquery.Client()

datasets = list(client.list_datasets())
project = client.project

if datasets:
print("Datasets in project {}:".format(project))
for dataset in datasets: # API request(s)
print("\t{}".format(dataset.dataset_id))
else:
print("{} project does not contain any datasets.".format(project))
# [END bigquery_list_datasets]


def test_list_datasets_by_label(client, to_delete):
dataset_id = "list_datasets_by_label_{}".format(_millis())
dataset = bigquery.Dataset(client.dataset(dataset_id))
Expand Down Expand Up @@ -192,51 +174,6 @@ def test_list_datasets_by_label(client, to_delete):
assert dataset_id in found


def test_get_dataset_information(client, to_delete):
"""View information about a dataset."""
dataset_id = "get_dataset_{}".format(_millis())
dataset_labels = {"color": "green"}
dataset_ref = client.dataset(dataset_id)
dataset = bigquery.Dataset(dataset_ref)
dataset.description = ORIGINAL_DESCRIPTION
dataset.labels = dataset_labels
dataset = client.create_dataset(dataset) # API request
to_delete.append(dataset)

# [START bigquery_get_dataset]
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_id = 'my_dataset'

dataset_ref = client.dataset(dataset_id)
dataset = client.get_dataset(dataset_ref) # API request

# View dataset properties
print("Dataset ID: {}".format(dataset_id))
print("Description: {}".format(dataset.description))
print("Labels:")
labels = dataset.labels
if labels:
for label, value in labels.items():
print("\t{}: {}".format(label, value))
else:
print("\tDataset has no labels defined.")

# View tables in dataset
print("Tables:")
tables = list(client.list_tables(dataset_ref)) # API request(s)
if tables:
for table in tables:
print("\t{}".format(table.table_id))
else:
print("\tThis dataset does not contain any tables.")
# [END bigquery_get_dataset]

assert dataset.description == ORIGINAL_DESCRIPTION
assert dataset.labels == dataset_labels
assert tables == []


# [START bigquery_dataset_exists]
def dataset_exists(client, dataset_reference):
"""Return if a dataset exists.
Expand Down Expand Up @@ -431,45 +368,6 @@ def test_update_dataset_access(client, to_delete):
# [END bigquery_update_dataset_access]


def test_delete_dataset(client):
"""Delete a dataset."""
from google.cloud.exceptions import NotFound

dataset1_id = "delete_dataset_{}".format(_millis())
dataset1 = bigquery.Dataset(client.dataset(dataset1_id))
client.create_dataset(dataset1)

dataset2_id = "delete_dataset_with_tables{}".format(_millis())
dataset2 = bigquery.Dataset(client.dataset(dataset2_id))
client.create_dataset(dataset2)

table = bigquery.Table(dataset2.table("new_table"))
client.create_table(table)

# [START bigquery_delete_dataset]
# from google.cloud import bigquery
# client = bigquery.Client()

# Delete a dataset that does not contain any tables
# dataset1_id = 'my_empty_dataset'
dataset1_ref = client.dataset(dataset1_id)
client.delete_dataset(dataset1_ref) # API request

print("Dataset {} deleted.".format(dataset1_id))

# Use the delete_contents parameter to delete a dataset and its contents
# dataset2_id = 'my_dataset_with_tables'
dataset2_ref = client.dataset(dataset2_id)
client.delete_dataset(dataset2_ref, delete_contents=True) # API request

print("Dataset {} deleted.".format(dataset2_id))
# [END bigquery_delete_dataset]

for dataset in [dataset1, dataset2]:
with pytest.raises(NotFound):
client.get_dataset(dataset) # API request


def test_list_tables(client, to_delete):
"""List tables within a dataset."""
dataset_id = "list_tables_dataset_{}".format(_millis())
Expand Down
10 changes: 5 additions & 5 deletions bigquery/docs/usage/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Listing Datasets
List datasets for a project with the
:func:`~google.cloud.bigquery.client.Client.list_datasets` method:

.. literalinclude:: ../snippets.py
.. literalinclude:: ../samples/list_datasets.py
:language: python
:dedent: 4
:start-after: [START bigquery_list_datasets]
Expand All @@ -31,7 +31,7 @@ Getting a Dataset
Get a dataset resource (to pick up changes made by another client) with the
:func:`~google.cloud.bigquery.client.Client.get_dataset` method:

.. literalinclude:: ../snippets.py
.. literalinclude:: ../samples/get_dataset.py
:language: python
:dedent: 4
:start-after: [START bigquery_get_dataset]
Expand All @@ -55,7 +55,7 @@ Updating a Dataset
Update a property in a dataset's metadata with the
:func:`~google.cloud.bigquery.client.Client.update_dataset` method:

.. literalinclude:: ../snippets.py
.. literalinclude:: ../samples/update_dataset_description.py
:language: python
:dedent: 4
:start-after: [START bigquery_update_dataset_description]
Expand All @@ -64,7 +64,7 @@ Update a property in a dataset's metadata with the
Modify user permissions on a dataset with the
:func:`~google.cloud.bigquery.client.Client.update_dataset` method:

.. literalinclude:: ../snippets.py
.. literalinclude:: ../samples/update_dataset_access.py
:language: python
:dedent: 4
:start-after: [START bigquery_update_dataset_access]
Expand All @@ -76,7 +76,7 @@ Deleting a Dataset
Delete a dataset with the
:func:`~google.cloud.bigquery.client.Client.delete_dataset` method:

.. literalinclude:: ../snippets.py
.. literalinclude:: ../samples/delete_dataset.py
:language: python
:dedent: 4
:start-after: [START bigquery_delete_dataset]
Expand Down
30 changes: 30 additions & 0 deletions bigquery/samples/delete_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def delete_dataset(client, dataset_id):

# [START bigquery_delete_dataset]
from google.cloud import bigquery

# TODO(developer): Construct a BigQuery client object.
# client = bigquery.Client()

# TODO(developer): Set model_id to the ID of the model to fetch.
# dataset_id = 'your-project.your_dataset'

client.delete_dataset(dataset_id)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The original sample also had an example showing the delete_contents=True option. I think you could actually do the same, but also show off not_found_ok=True.

client.delete_dataset(dataset_id)

# Comment about deleting a table that contains tables.
# Comment about tables that might not exist.
client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=True)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok! Added the parameters and notes about what they do.

# [END bigquery_delete_dataset]

print("Deleted dataset '{}'.".format(dataset_id))
57 changes: 57 additions & 0 deletions bigquery/samples/get_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def get_dataset(client, dataset_id):

# [START bigquery_get_dataset]
from google.cloud import bigquery

# TODO(developer): Construct a BigQuery client object.
# client = bigquery.Client()

# TODO(developer): Set dataset_id to the ID of the dataset to fetch.
# dataset_id = 'your-project.your_dataset'

dataset = client.get_dataset(dataset_id)

full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id)
friendly_name = dataset.friendly_name
print(
"Got dataset '{}' with friendly_name '{}'.".format(
full_dataset_id, friendly_name
)
)

# View dataset properties
print("Dataset ID: {}".format(dataset_id))
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can delete this line since you're printing the ID in the same sample in the print statement above.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed!

print("Description: {}".format(dataset.description))
print("Labels:")
labels = dataset.labels
if labels:
for label, value in labels.items():
print("\t{}: {}".format(label, value))
else:
print("\tDataset has no labels defined.")

# View tables in dataset
print("Tables:")
tables = list(client.list_tables(dataset)) # API request(s)
if tables:
for table in tables:
print("\t{}".format(table.table_id))
else:
print("\tThis dataset does not contain any tables.")

# [END bigquery_get_dataset]
33 changes: 33 additions & 0 deletions bigquery/samples/list_datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def list_datasets(client):

# [START bigquery_list_datasets]
from google.cloud import bigquery

# TODO(developer): Construct a BigQuery client object.
# client = bigquery.Client()

datasets = list(client.list_datasets())
project = client.project

if datasets:
print("Datasets in project {}:".format(project))
for dataset in datasets: # API request(s)
print("\t{}".format(dataset.dataset_id))
else:
print("{} project does not contain any datasets.".format(project))
# [END bigquery_list_datasets]
44 changes: 43 additions & 1 deletion bigquery/samples/tests/test_dataset_samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,52 @@
# limitations under the License.

from .. import create_dataset
from .. import delete_dataset
from .. import get_dataset
from .. import list_datasets
from .. import update_dataset_description
from .. import update_dataset_default_table_expiration
from .. import update_dataset_access


def test_create_dataset(capsys, client, random_dataset_id):
def test_dataset_samples(capsys, client, random_dataset_id):

# create dataset
create_dataset.create_dataset(client, random_dataset_id)
out, err = capsys.readouterr()
assert "Created dataset {}".format(random_dataset_id) in out

# get dataset
get_dataset.get_dataset(client, random_dataset_id)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These should all be separate tests. That way we can more easily tell when an individual sample is broken. Models API is an exception because it's so slow to create one.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All separated out into their own tests. Had to include the create_dataset function for some of the tests or else they wouldn't pass (needs a dataset to make changes to!).

out, err = capsys.readouterr()
assert random_dataset_id in out

# list dataset
list_datasets.list_datasets(client)
out, err = capsys.readouterr()
assert "Datasets in project {}:".format(client.project) in out

# update dataset description
update_dataset_description.update_dataset_description(client, random_dataset_id)
out, err = capsys.readouterr()
assert "Updated description." in out

# update dataset table expiration
update_dataset_default_table_expiration.update_dataset_default_table_expiration(
client, random_dataset_id
)
out, err = capsys.readouterr()
assert "Updated dataset {}".format(random_dataset_id) in out
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please look for the expected expiration str(24 * 60 * 60 * 1000) in the output too. That way we can be more certain that the expiration was actually updated as expected.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved the time into it's own fixture so it's easier to be used in the test.


# update dataset permissions
update_dataset_access.update_dataset_access(client, random_dataset_id)
out, err = capsys.readouterr()
assert (
"Updated dataset '{}' with modified user permissions.".format(random_dataset_id)
in out
)

# delete dataset
delete_dataset.delete_dataset(client, random_dataset_id)
out, err = capsys.readouterr()
assert "Deleted dataset '{}'.".format(random_dataset_id) in out
45 changes: 45 additions & 0 deletions bigquery/samples/update_dataset_access.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def update_dataset_access(client, dataset_id):

# [START bigquery_update_dataset_access]
tswast marked this conversation as resolved.
Show resolved Hide resolved
from google.cloud import bigquery

# TODO(developer): Construct a BigQuery client object.
# client = bigquery.Client()

# TODO(developer): Set dataset_id to the ID of the dataset to fetch.
# dataset_id = 'your-project.your_dataset'

dataset = client.get_dataset(dataset_id)

entry = bigquery.AccessEntry(
role="READER",
entity_type="userByEmail",
entity_id="[email protected]",
)

entries = list(dataset.access_entries)
entries.append(entry)
dataset.access_entries = entries

dataset = client.update_dataset(dataset, ["access_entries"]) # API request

full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id)
print(
"Updated dataset '{}' with modified user permissions.".format(full_dataset_id)
)
# [END bigquery_update_dataset_access]
Loading