From 17d757a76a1bf630fffaf774d0491919c68a6d19 Mon Sep 17 00:00:00 2001 From: Jon Wayne Parrott Date: Mon, 12 Sep 2016 12:59:26 -0700 Subject: [PATCH] Add bigquery create and copy table examples [(#514)](https://github.com/GoogleCloudPlatform/python-docs-samples/issues/514) * Add bigquery create table sample * Add copy table sample * Fix test table ids --- samples/snippets/export_data_to_gcs_test.py | 2 +- samples/snippets/snippets.py | 88 +++++++++++++++++++++ samples/snippets/snippets_test.py | 39 ++++++++- 3 files changed, 126 insertions(+), 3 deletions(-) diff --git a/samples/snippets/export_data_to_gcs_test.py b/samples/snippets/export_data_to_gcs_test.py index e260e47b4..acbbe50e5 100644 --- a/samples/snippets/export_data_to_gcs_test.py +++ b/samples/snippets/export_data_to_gcs_test.py @@ -15,7 +15,7 @@ DATASET_ID = 'test_dataset' -TABLE_ID = 'test_import_table' +TABLE_ID = 'test_table' def test_export_data_to_gcs(cloud_config, capsys): diff --git a/samples/snippets/snippets.py b/samples/snippets/snippets.py index 49272965b..d201a84cb 100644 --- a/samples/snippets/snippets.py +++ b/samples/snippets/snippets.py @@ -25,8 +25,11 @@ """ import argparse +import time +import uuid from gcloud import bigquery +import gcloud.bigquery.job def list_projects(): @@ -82,6 +85,32 @@ def list_tables(dataset_name, project=None): print(table.name) +def create_table(dataset_name, table_name, project=None): + """Creates a simple table in the given dataset. + + If no project is specified, then the currently active project is used. + """ + bigquery_client = bigquery.Client(project=project) + dataset = bigquery_client.dataset(dataset_name) + + if not dataset.exists(): + print('Dataset {} does not exist.'.format(dataset_name)) + return + + table = dataset.table(table_name) + + # Set the table schema + table.schema = ( + bigquery.SchemaField('Name', 'STRING'), + bigquery.SchemaField('Age', 'INTEGER'), + bigquery.SchemaField('Weight', 'FLOAT'), + ) + + table.create() + + print('Created table {} in dataset {}.'.format(table_name, dataset_name)) + + def list_rows(dataset_name, table_name, project=None): """Prints rows in the given table. @@ -126,6 +155,50 @@ def list_rows(dataset_name, table_name, project=None): print(format_string.format(*row)) +def copy_table(dataset_name, table_name, new_table_name, project=None): + """Copies a table. + + If no project is specified, then the currently active project is used. + """ + bigquery_client = bigquery.Client(project=project) + dataset = bigquery_client.dataset(dataset_name) + table = dataset.table(table_name) + + # This sample shows the destination table in the same dataset and project, + # however, it's possible to copy across datasets and projects. You can + # also copy muliple source tables into a single destination table by + # providing addtional arguments to `copy_table`. + destination_table = dataset.table(new_table_name) + + # Create a job to copy the table to the destination table. + job_id = str(uuid.uuid4()) + job = bigquery_client.copy_table( + job_id, destination_table, table) + + # Create the table if it doesn't exist. + job.create_disposition = ( + gcloud.bigquery.job.CreateDisposition.CREATE_IF_NEEDED) + + # Start the job. + job.begin() + + # Wait for the the job to finish. + print('Waiting for job to finish...') + wait_for_job(job) + + print('Table {} copied to {}.'.format(table_name, new_table_name)) + + +def wait_for_job(job): + while True: + job.reload() # Refreshes the state via a GET request. + if job.state == 'DONE': + if job.error_result: + raise RuntimeError(job.error_result) + return + time.sleep(1) + + def delete_table(dataset_name, table_name, project=None): """Deletes a table in a given dataset. @@ -155,11 +228,22 @@ def delete_table(dataset_name, table_name, project=None): 'list-tables', help=list_tables.__doc__) list_tables_parser.add_argument('dataset_name') + create_table_parser = subparsers.add_parser( + 'create-table', help=create_table.__doc__) + create_table_parser.add_argument('dataset_name') + create_table_parser.add_argument('table_name') + list_rows_parser = subparsers.add_parser( 'list-rows', help=list_rows.__doc__) list_rows_parser.add_argument('dataset_name') list_rows_parser.add_argument('table_name') + copy_table_parser = subparsers.add_parser( + 'copy-table', help=copy_table.__doc__) + copy_table_parser.add_argument('dataset_name') + copy_table_parser.add_argument('table_name') + copy_table_parser.add_argument('new_table_name') + delete_table_parser = subparsers.add_parser( 'delete-table', help=delete_table.__doc__) delete_table_parser.add_argument('dataset_name') @@ -171,7 +255,11 @@ def delete_table(dataset_name, table_name, project=None): list_datasets(args.project) elif args.command == 'list-tables': list_tables(args.dataset_name, args.project) + elif args.command == 'create-table': + create_table(args.dataset_name, args.table_name, args.project) elif args.command == 'list-rows': list_rows(args.dataset_name, args.table_name, args.project) + elif args.command == 'copy-table': + copy_table(args.dataset_name, args.table_name, args.new_table_name) elif args.command == 'delete-table': delete_table(args.dataset_name, args.table_name, args.project) diff --git a/samples/snippets/snippets_test.py b/samples/snippets/snippets_test.py index ed2f47639..372cbc834 100644 --- a/samples/snippets/snippets_test.py +++ b/samples/snippets/snippets_test.py @@ -19,7 +19,7 @@ DATASET_ID = 'test_dataset' -TABLE_ID = 'test_import_table' +TABLE_ID = 'test_table' @pytest.mark.xfail( @@ -62,7 +62,42 @@ def test_list_rows(capsys): assert 'Age' in out -def test_delete_table(capsys): +@pytest.fixture +def temporary_table(): + """Fixture that returns a factory for tables that do not yet exist and + will be automatically deleted after the test.""" + bigquery_client = bigquery.Client() + dataset = bigquery_client.dataset(DATASET_ID) + tables = [] + + def factory(table_name): + new_table = dataset.table(table_name) + if new_table.exists(): + new_table.delete() + tables.append(new_table) + return new_table + + yield factory + + for table in tables: + if table.exists(): + table.delete() + + +def test_create_table(temporary_table): + new_table = temporary_table('test_create_table') + snippets.create_table(DATASET_ID, new_table.name) + assert new_table.exists() + + +@pytest.mark.slow +def test_copy_table(temporary_table): + new_table = temporary_table('test_copy_table') + snippets.copy_table(DATASET_ID, TABLE_ID, new_table.name) + assert new_table.exists() + + +def test_delete_table(): # Create a table to delete bigquery_client = bigquery.Client() dataset = bigquery_client.dataset(DATASET_ID)