Skip to content

Commit

Permalink
Added collection of Managing Dataset samples (#6)
Browse files Browse the repository at this point in the history
* Added delete dataset function

* Added get dataset function

* Added list dataset function

* Added update dataset description sample

* Added update dataset access sample

* Added update dataset table expiration sample

* Added tests for dataset samples and updated docs

* Removing original update dataset access from snippets file.

* Moved all dataset tests into own file. Made changes based on feedback.

* Made changes based on feedback

* Removed unnecessary use of random_dataset_id in tests and removed one_day_ms fixture

* Removed unnecessary constant

* Stored the math as a constant to make it look cleaner.
  • Loading branch information
lbristol88 authored and tswast committed Apr 10, 2019
1 parent 92fd8af commit 32e43a9
Show file tree
Hide file tree
Showing 15 changed files with 389 additions and 201 deletions.
196 changes: 0 additions & 196 deletions bigquery/docs/snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,24 +147,6 @@ def test_create_client_json_credentials():
assert client is not None


def test_list_datasets(client):
"""List datasets for a project."""
# [START bigquery_list_datasets]
# from google.cloud import bigquery
# client = bigquery.Client()

datasets = list(client.list_datasets())
project = client.project

if datasets:
print("Datasets in project {}:".format(project))
for dataset in datasets: # API request(s)
print("\t{}".format(dataset.dataset_id))
else:
print("{} project does not contain any datasets.".format(project))
# [END bigquery_list_datasets]


def test_list_datasets_by_label(client, to_delete):
dataset_id = "list_datasets_by_label_{}".format(_millis())
dataset = bigquery.Dataset(client.dataset(dataset_id))
Expand Down Expand Up @@ -192,51 +174,6 @@ def test_list_datasets_by_label(client, to_delete):
assert dataset_id in found


def test_get_dataset_information(client, to_delete):
"""View information about a dataset."""
dataset_id = "get_dataset_{}".format(_millis())
dataset_labels = {"color": "green"}
dataset_ref = client.dataset(dataset_id)
dataset = bigquery.Dataset(dataset_ref)
dataset.description = ORIGINAL_DESCRIPTION
dataset.labels = dataset_labels
dataset = client.create_dataset(dataset) # API request
to_delete.append(dataset)

# [START bigquery_get_dataset]
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_id = 'my_dataset'

dataset_ref = client.dataset(dataset_id)
dataset = client.get_dataset(dataset_ref) # API request

# View dataset properties
print("Dataset ID: {}".format(dataset_id))
print("Description: {}".format(dataset.description))
print("Labels:")
labels = dataset.labels
if labels:
for label, value in labels.items():
print("\t{}: {}".format(label, value))
else:
print("\tDataset has no labels defined.")

# View tables in dataset
print("Tables:")
tables = list(client.list_tables(dataset_ref)) # API request(s)
if tables:
for table in tables:
print("\t{}".format(table.table_id))
else:
print("\tThis dataset does not contain any tables.")
# [END bigquery_get_dataset]

assert dataset.description == ORIGINAL_DESCRIPTION
assert dataset.labels == dataset_labels
assert tables == []


# [START bigquery_dataset_exists]
def dataset_exists(client, dataset_reference):
"""Return if a dataset exists.
Expand Down Expand Up @@ -274,66 +211,6 @@ def test_dataset_exists(client, to_delete):
assert not dataset_exists(client, client.dataset("i_dont_exist"))


@pytest.mark.skip(
reason=(
"update_dataset() is flaky "
"https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5588"
)
)
def test_update_dataset_description(client, to_delete):
"""Update a dataset's description."""
dataset_id = "update_dataset_description_{}".format(_millis())
dataset = bigquery.Dataset(client.dataset(dataset_id))
dataset.description = "Original description."
client.create_dataset(dataset)
to_delete.append(dataset)

# [START bigquery_update_dataset_description]
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_ref = client.dataset('my_dataset')
# dataset = client.get_dataset(dataset_ref) # API request

assert dataset.description == "Original description."
dataset.description = "Updated description."

dataset = client.update_dataset(dataset, ["description"]) # API request

assert dataset.description == "Updated description."
# [END bigquery_update_dataset_description]


@pytest.mark.skip(
reason=(
"update_dataset() is flaky "
"https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5588"
)
)
def test_update_dataset_default_table_expiration(client, to_delete):
"""Update a dataset's default table expiration."""
dataset_id = "update_dataset_default_expiration_{}".format(_millis())
dataset = bigquery.Dataset(client.dataset(dataset_id))
dataset = client.create_dataset(dataset)
to_delete.append(dataset)

# [START bigquery_update_dataset_expiration]
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_ref = client.dataset('my_dataset')
# dataset = client.get_dataset(dataset_ref) # API request

assert dataset.default_table_expiration_ms is None
one_day_ms = 24 * 60 * 60 * 1000 # in milliseconds
dataset.default_table_expiration_ms = one_day_ms

dataset = client.update_dataset(
dataset, ["default_table_expiration_ms"]
) # API request

assert dataset.default_table_expiration_ms == one_day_ms
# [END bigquery_update_dataset_expiration]


@pytest.mark.skip(
reason=(
"update_dataset() is flaky "
Expand Down Expand Up @@ -397,79 +274,6 @@ def test_manage_dataset_labels(client, to_delete):
# [END bigquery_delete_label_dataset]


@pytest.mark.skip(
reason=(
"update_dataset() is flaky "
"https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5588"
)
)
def test_update_dataset_access(client, to_delete):
"""Update a dataset's access controls."""
dataset_id = "update_dataset_access_{}".format(_millis())
dataset = bigquery.Dataset(client.dataset(dataset_id))
dataset = client.create_dataset(dataset)
to_delete.append(dataset)

# [START bigquery_update_dataset_access]
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset = client.get_dataset(client.dataset('my_dataset'))

entry = bigquery.AccessEntry(
role="READER",
entity_type="userByEmail",
entity_id="[email protected]",
)
assert entry not in dataset.access_entries
entries = list(dataset.access_entries)
entries.append(entry)
dataset.access_entries = entries

dataset = client.update_dataset(dataset, ["access_entries"]) # API request

assert entry in dataset.access_entries
# [END bigquery_update_dataset_access]


def test_delete_dataset(client):
"""Delete a dataset."""
from google.cloud.exceptions import NotFound

dataset1_id = "delete_dataset_{}".format(_millis())
dataset1 = bigquery.Dataset(client.dataset(dataset1_id))
client.create_dataset(dataset1)

dataset2_id = "delete_dataset_with_tables{}".format(_millis())
dataset2 = bigquery.Dataset(client.dataset(dataset2_id))
client.create_dataset(dataset2)

table = bigquery.Table(dataset2.table("new_table"))
client.create_table(table)

# [START bigquery_delete_dataset]
# from google.cloud import bigquery
# client = bigquery.Client()

# Delete a dataset that does not contain any tables
# dataset1_id = 'my_empty_dataset'
dataset1_ref = client.dataset(dataset1_id)
client.delete_dataset(dataset1_ref) # API request

print("Dataset {} deleted.".format(dataset1_id))

# Use the delete_contents parameter to delete a dataset and its contents
# dataset2_id = 'my_dataset_with_tables'
dataset2_ref = client.dataset(dataset2_id)
client.delete_dataset(dataset2_ref, delete_contents=True) # API request

print("Dataset {} deleted.".format(dataset2_id))
# [END bigquery_delete_dataset]

for dataset in [dataset1, dataset2]:
with pytest.raises(NotFound):
client.get_dataset(dataset) # API request


def test_list_tables(client, to_delete):
"""List tables within a dataset."""
dataset_id = "list_tables_dataset_{}".format(_millis())
Expand Down
10 changes: 5 additions & 5 deletions bigquery/docs/usage/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Listing Datasets
List datasets for a project with the
:func:`~google.cloud.bigquery.client.Client.list_datasets` method:

.. literalinclude:: ../snippets.py
.. literalinclude:: ../samples/list_datasets.py
:language: python
:dedent: 4
:start-after: [START bigquery_list_datasets]
Expand All @@ -31,7 +31,7 @@ Getting a Dataset
Get a dataset resource (to pick up changes made by another client) with the
:func:`~google.cloud.bigquery.client.Client.get_dataset` method:

.. literalinclude:: ../snippets.py
.. literalinclude:: ../samples/get_dataset.py
:language: python
:dedent: 4
:start-after: [START bigquery_get_dataset]
Expand All @@ -55,7 +55,7 @@ Updating a Dataset
Update a property in a dataset's metadata with the
:func:`~google.cloud.bigquery.client.Client.update_dataset` method:

.. literalinclude:: ../snippets.py
.. literalinclude:: ../samples/update_dataset_description.py
:language: python
:dedent: 4
:start-after: [START bigquery_update_dataset_description]
Expand All @@ -64,7 +64,7 @@ Update a property in a dataset's metadata with the
Modify user permissions on a dataset with the
:func:`~google.cloud.bigquery.client.Client.update_dataset` method:

.. literalinclude:: ../snippets.py
.. literalinclude:: ../samples/update_dataset_access.py
:language: python
:dedent: 4
:start-after: [START bigquery_update_dataset_access]
Expand All @@ -76,7 +76,7 @@ Deleting a Dataset
Delete a dataset with the
:func:`~google.cloud.bigquery.client.Client.delete_dataset` method:

.. literalinclude:: ../snippets.py
.. literalinclude:: ../samples/delete_dataset.py
:language: python
:dedent: 4
:start-after: [START bigquery_delete_dataset]
Expand Down
32 changes: 32 additions & 0 deletions bigquery/samples/delete_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def delete_dataset(client, dataset_id):

# [START bigquery_delete_dataset]
from google.cloud import bigquery

# TODO(developer): Construct a BigQuery client object.
# client = bigquery.Client()

# TODO(developer): Set model_id to the ID of the model to fetch.
# dataset_id = 'your-project.your_dataset'

# Use the delete_contents parameter to delete a dataset and its contents
# Use the not_found_ok parameter to not receive an error if the dataset has already been deleted.
client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=True)
# [END bigquery_delete_dataset]

print("Deleted dataset '{}'.".format(dataset_id))
56 changes: 56 additions & 0 deletions bigquery/samples/get_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def get_dataset(client, dataset_id):

# [START bigquery_get_dataset]
from google.cloud import bigquery

# TODO(developer): Construct a BigQuery client object.
# client = bigquery.Client()

# TODO(developer): Set dataset_id to the ID of the dataset to fetch.
# dataset_id = 'your-project.your_dataset'

dataset = client.get_dataset(dataset_id)

full_dataset_id = "{}.{}".format(dataset.project, dataset.dataset_id)
friendly_name = dataset.friendly_name
print(
"Got dataset '{}' with friendly_name '{}'.".format(
full_dataset_id, friendly_name
)
)

# View dataset properties
print("Description: {}".format(dataset.description))
print("Labels:")
labels = dataset.labels
if labels:
for label, value in labels.items():
print("\t{}: {}".format(label, value))
else:
print("\tDataset has no labels defined.")

# View tables in dataset
print("Tables:")
tables = list(client.list_tables(dataset)) # API request(s)
if tables:
for table in tables:
print("\t{}".format(table.table_id))
else:
print("\tThis dataset does not contain any tables.")

# [END bigquery_get_dataset]
33 changes: 33 additions & 0 deletions bigquery/samples/list_datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def list_datasets(client):

# [START bigquery_list_datasets]
from google.cloud import bigquery

# TODO(developer): Construct a BigQuery client object.
# client = bigquery.Client()

datasets = list(client.list_datasets())
project = client.project

if datasets:
print("Datasets in project {}:".format(project))
for dataset in datasets: # API request(s)
print("\t{}".format(dataset.dataset_id))
else:
print("{} project does not contain any datasets.".format(project))
# [END bigquery_list_datasets]
Loading

0 comments on commit 32e43a9

Please sign in to comment.