Skip to content

Commit

Permalink
BigQuery: deprecate client.dataset() part 1 (googleapis#9032)
Browse files Browse the repository at this point in the history
Deprecate `client.dataset()` part 1
  • Loading branch information
emar-kar committed Sep 18, 2019
1 parent 664fa87 commit eb18dee
Show file tree
Hide file tree
Showing 40 changed files with 507 additions and 212 deletions.
280 changes: 187 additions & 93 deletions bigquery/docs/snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,27 +87,199 @@ def delete(self):
self._wrapped.close()


def dataset_exists(client, dataset_reference):
"""Return if a dataset exists.
def test_create_client_default_credentials():
"""Create a BigQuery client with Application Default Credentials"""

Args:
client (google.cloud.bigquery.client.Client):
A client to connect to the BigQuery API.
dataset_reference (google.cloud.bigquery.dataset.DatasetReference):
A reference to the dataset to look for.
# [START bigquery_client_default_credentials]
from google.cloud import bigquery

Returns:
bool: ``True`` if the dataset exists, ``False`` otherwise.
# If you don't specify credentials when constructing the client, the
# client library will look for credentials in the environment.
client = bigquery.Client()
# [END bigquery_client_default_credentials]

assert client is not None


def test_create_table_nested_repeated_schema(client, to_delete):
dataset_id = "create_table_nested_repeated_{}".format(_millis())
dataset_ref = client.dataset(dataset_id)
dataset = bigquery.Dataset(dataset_ref)
client.create_dataset(dataset)
to_delete.append(dataset)

# [START bigquery_nested_repeated_schema]
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_ref = client.dataset('my_dataset')

schema = [
bigquery.SchemaField("id", "STRING", mode="NULLABLE"),
bigquery.SchemaField("first_name", "STRING", mode="NULLABLE"),
bigquery.SchemaField("last_name", "STRING", mode="NULLABLE"),
bigquery.SchemaField("dob", "DATE", mode="NULLABLE"),
bigquery.SchemaField(
"addresses",
"RECORD",
mode="REPEATED",
fields=[
bigquery.SchemaField("status", "STRING", mode="NULLABLE"),
bigquery.SchemaField("address", "STRING", mode="NULLABLE"),
bigquery.SchemaField("city", "STRING", mode="NULLABLE"),
bigquery.SchemaField("state", "STRING", mode="NULLABLE"),
bigquery.SchemaField("zip", "STRING", mode="NULLABLE"),
bigquery.SchemaField("numberOfYears", "STRING", mode="NULLABLE"),
],
),
]
table_ref = dataset_ref.table("my_table")
table = bigquery.Table(table_ref, schema=schema)
table = client.create_table(table) # API request

print("Created table {}".format(table.full_table_id))
# [END bigquery_nested_repeated_schema]


def test_create_table_cmek(client, to_delete):
dataset_id = "create_table_cmek_{}".format(_millis())
dataset = bigquery.Dataset(client.dataset(dataset_id))
client.create_dataset(dataset)
to_delete.append(dataset)

# [START bigquery_create_table_cmek]
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_id = 'my_dataset'

table_ref = client.dataset(dataset_id).table("my_table")
table = bigquery.Table(table_ref)

# Set the encryption key to use for the table.
# TODO: Replace this key with a key you have created in Cloud KMS.
kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format(
"cloud-samples-tests", "us-central1", "test", "test"
)
table.encryption_configuration = bigquery.EncryptionConfiguration(
kms_key_name=kms_key_name
)

table = client.create_table(table) # API request

assert table.encryption_configuration.kms_key_name == kms_key_name
# [END bigquery_create_table_cmek]


def test_create_partitioned_table(client, to_delete):
dataset_id = "create_table_partitioned_{}".format(_millis())
dataset_ref = bigquery.Dataset(client.dataset(dataset_id))
dataset = client.create_dataset(dataset_ref)
to_delete.append(dataset)

# [START bigquery_create_table_partitioned]
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_ref = client.dataset('my_dataset')

table_ref = dataset_ref.table("my_partitioned_table")
schema = [
bigquery.SchemaField("name", "STRING"),
bigquery.SchemaField("post_abbr", "STRING"),
bigquery.SchemaField("date", "DATE"),
]
table = bigquery.Table(table_ref, schema=schema)
table.time_partitioning = bigquery.TimePartitioning(
type_=bigquery.TimePartitioningType.DAY,
field="date", # name of column to use for partitioning
expiration_ms=7776000000,
) # 90 days

table = client.create_table(table)

print(
"Created table {}, partitioned on column {}".format(
table.table_id, table.time_partitioning.field
)
)
# [END bigquery_create_table_partitioned]

assert table.time_partitioning.type_ == "DAY"
assert table.time_partitioning.field == "date"
assert table.time_partitioning.expiration_ms == 7776000000


def test_load_and_query_partitioned_table(client, to_delete):
dataset_id = "load_partitioned_table_dataset_{}".format(_millis())
dataset = bigquery.Dataset(client.dataset(dataset_id))
client.create_dataset(dataset)
to_delete.append(dataset)

# [START bigquery_load_table_partitioned]
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_id = 'my_dataset'
table_id = "us_states_by_date"

dataset_ref = client.dataset(dataset_id)
job_config = bigquery.LoadJobConfig()
job_config.schema = [
bigquery.SchemaField("name", "STRING"),
bigquery.SchemaField("post_abbr", "STRING"),
bigquery.SchemaField("date", "DATE"),
]
job_config.skip_leading_rows = 1
job_config.time_partitioning = bigquery.TimePartitioning(
type_=bigquery.TimePartitioningType.DAY,
field="date", # name of column to use for partitioning
expiration_ms=7776000000,
) # 90 days
uri = "gs://cloud-samples-data/bigquery/us-states/us-states-by-date.csv"

load_job = client.load_table_from_uri(
uri, dataset_ref.table(table_id), job_config=job_config
) # API request

assert load_job.job_type == "load"

load_job.result() # Waits for table load to complete.

table = client.get_table(dataset_ref.table(table_id))
print("Loaded {} rows to table {}".format(table.num_rows, table_id))
# [END bigquery_load_table_partitioned]
assert table.num_rows == 50

project_id = client.project

# [START bigquery_query_partitioned_table]
import datetime

# from google.cloud import bigquery
# client = bigquery.Client()
# project_id = 'my-project'
# dataset_id = 'my_dataset'
table_id = "us_states_by_date"

sql_template = """
SELECT *
FROM `{}.{}.{}`
WHERE date BETWEEN @start_date AND @end_date
"""
from google.cloud.exceptions import NotFound
sql = sql_template.format(project_id, dataset_id, table_id)
job_config = bigquery.QueryJobConfig()
job_config.query_parameters = [
bigquery.ScalarQueryParameter("start_date", "DATE", datetime.date(1800, 1, 1)),
bigquery.ScalarQueryParameter("end_date", "DATE", datetime.date(1899, 12, 31)),
]

try:
client.get_dataset(dataset_reference)
return True
except NotFound:
return False
# API request
query_job = client.query(sql, job_config=job_config)

rows = list(query_job)
print("{} states were admitted to the US in the 1800s".format(len(rows)))
# [END bigquery_query_partitioned_table]
assert len(rows) == 29


# [START bigquery_table_exists]
def table_exists(client, table_reference):
"""Return if a table exists.
Expand Down Expand Up @@ -289,43 +461,6 @@ def test_update_table_expiration(client, to_delete):
# [END bigquery_update_table_expiration]


@pytest.mark.skip(
reason=(
"update_table() is flaky "
"https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589"
)
)
def test_add_empty_column(client, to_delete):
"""Adds an empty column to an existing table."""
dataset_id = "add_empty_column_dataset_{}".format(_millis())
table_id = "add_empty_column_table_{}".format(_millis())
dataset = bigquery.Dataset(client.dataset(dataset_id))
dataset = client.create_dataset(dataset)
to_delete.append(dataset)

table = bigquery.Table(dataset.table(table_id), schema=SCHEMA)
table = client.create_table(table)

# [START bigquery_add_empty_column]
# from google.cloud import bigquery
# client = bigquery.Client()
# dataset_id = 'my_dataset'
# table_id = 'my_table'

table_ref = client.dataset(dataset_id).table(table_id)
table = client.get_table(table_ref) # API request

original_schema = table.schema
new_schema = original_schema[:] # creates a copy of the schema
new_schema.append(bigquery.SchemaField("phone", "STRING"))

table.schema = new_schema
table = client.update_table(table, ["schema"]) # API request

assert len(table.schema) == len(original_schema) + 1 == len(new_schema)
# [END bigquery_add_empty_column]


@pytest.mark.skip(
reason=(
"update_table() is flaky "
Expand Down Expand Up @@ -414,47 +549,6 @@ def test_update_table_cmek(client, to_delete):
# [END bigquery_update_table_cmek]


def test_browse_table_data(client, to_delete, capsys):
"""Retreive selected row data from a table."""

# [START bigquery_browse_table]
# from google.cloud import bigquery
# client = bigquery.Client()

dataset_ref = client.dataset("samples", project="bigquery-public-data")
table_ref = dataset_ref.table("shakespeare")
table = client.get_table(table_ref) # API call

# Load all rows from a table
rows = client.list_rows(table)
assert len(list(rows)) == table.num_rows

# Load the first 10 rows
rows = client.list_rows(table, max_results=10)
assert len(list(rows)) == 10

# Specify selected fields to limit the results to certain columns
fields = table.schema[:2] # first two columns
rows = client.list_rows(table, selected_fields=fields, max_results=10)
assert len(rows.schema) == 2
assert len(list(rows)) == 10

# Use the start index to load an arbitrary portion of the table
rows = client.list_rows(table, start_index=10, max_results=10)

# Print row data in tabular format
format_string = "{!s:<16} " * len(rows.schema)
field_names = [field.name for field in rows.schema]
print(format_string.format(*field_names)) # prints column headers
for row in rows:
print(format_string.format(*row)) # prints row data
# [END bigquery_browse_table]

out, err = capsys.readouterr()
out = list(filter(bool, out.split("\n"))) # list of non-blank lines
assert len(out) == 11


@pytest.mark.skip(
reason=(
"update_table() is flaky "
Expand Down
41 changes: 40 additions & 1 deletion bigquery/docs/usage/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ List datasets for a project with the
List datasets by label for a project with the
:func:`~google.cloud.bigquery.client.Client.list_datasets` method:

.. literalinclude:: ../samples/samples/list_datasets_by_label.py
.. literalinclude:: ../samples/list_datasets_by_label.py
:language: python
:dedent: 4
:start-after: [START bigquery_list_datasets_by_label]
Expand All @@ -46,6 +46,15 @@ Get a dataset resource (to pick up changes made by another client) with the
:start-after: [START bigquery_get_dataset]
:end-before: [END bigquery_get_dataset]

Determine if a dataset exists with the
:func:`~google.cloud.bigquery.client.Client.get_dataset` method:

.. literalinclude:: ../samples/dataset_exists.py
:language: python
:dedent: 4
:start-after: [START bigquery_dataset_exists]
:end-before: [END bigquery_dataset_exists]

Creating a Dataset
^^^^^^^^^^^^^^^^^^

Expand Down Expand Up @@ -79,6 +88,36 @@ Modify user permissions on a dataset with the
:start-after: [START bigquery_update_dataset_access]
:end-before: [END bigquery_update_dataset_access]

Manage Dataset labels
^^^^^^^^^^^^^^^^^^^^^

Add labels to a dataset with the
:func:`~google.cloud.bigquery.client.Client.update_dataset` method:

.. literalinclude:: ../samples/label_dataset.py
:language: python
:dedent: 4
:start-after: [START bigquery_label_dataset]
:end-before: [END bigquery_label_dataset]

Get dataset's labels with the
:func:`~google.cloud.bigquery.client.Client.get_dataset` method:

.. literalinclude:: ../samples/get_dataset_labels.py
:language: python
:dedent: 4
:start-after: [START bigquery_get_dataset_labels]
:end-before: [END bigquery_get_dataset_labels]

Delete dataset's labels with the
:func:`~google.cloud.bigquery.client.Client.update_dataset` method:

.. literalinclude:: ../samples/delete_dataset_labels.py
:language: python
:dedent: 4
:start-after: [START bigquery_delete_label_dataset]
:end-before: [END bigquery_delete_label_dataset]

Deleting a Dataset
^^^^^^^^^^^^^^^^^^

Expand Down
9 changes: 9 additions & 0 deletions bigquery/docs/usage/tables.rst
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,15 @@ Insert rows into a table's data with the
:start-after: [START bigquery_table_insert_rows]
:end-before: [END bigquery_table_insert_rows]

Add an empty column to the existing table with the
:func:`~google.cloud.bigquery.update_table` method:

.. literalinclude:: ../samples/add_empty_column.py
:language: python
:dedent: 4
:start-after: [START bigquery_add_empty_column]
:end-before: [END bigquery_add_empty_column]

Copying a Table
^^^^^^^^^^^^^^^

Expand Down
Loading

0 comments on commit eb18dee

Please sign in to comment.