Skip to content

Commit

Permalink
Simplify BigQuery samples according to our standard.
Browse files Browse the repository at this point in the history
  • Loading branch information
jmdobry committed Sep 6, 2016
1 parent a4de29a commit aaa4f77
Show file tree
Hide file tree
Showing 9 changed files with 464 additions and 703 deletions.
94 changes: 30 additions & 64 deletions bigquery/datasets.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,79 +13,50 @@

'use strict';

// [START all]
// [START setup]
// By default, the client will authenticate using the service account file
// specified by the GOOGLE_APPLICATION_CREDENTIALS environment variable and use
// the project specified by the GCLOUD_PROJECT environment variable. See
// https://googlecloudplatform.github.io/gcloud-node/#/docs/google-cloud/latest/guides/authentication
// https://googlecloudplatform.github.io/google-cloud-node/#/docs/google-cloud/latest/guides/authentication
var BigQuery = require('@google-cloud/bigquery');

// Instantiate the bigquery client
var bigquery = BigQuery();
// [END setup]

// Control-flow helper library
var async = require('async');
function createDataset (datasetId, callback) {
var bigquery = BigQuery();
var dataset = bigquery.dataset(datasetId);

// [START create_dataset]
/**
* List datasets in the authenticated project.
*
* @param {string} name The name for the new dataset.
* @param {function} callback The callback function.
*/
function createDataset (name, callback) {
var dataset = bigquery.dataset(name);

// See https://googlecloudplatform.github.io/gcloud-node/#/docs/bigquery/latest/bigquery
dataset.create(function (err, dataset) {
// See https://googlecloudplatform.github.io/google-cloud-node/#/docs/bigquery/latest/bigquery/dataset?method=create
dataset.create(function (err, dataset, apiResponse) {
if (err) {
return callback(err);
}

console.log('Created dataset: %s', name);
return callback(null, dataset);
console.log('Created dataset: %s', datasetId);
return callback(null, dataset, apiResponse);
});
}
// [END create_dataset]

// [START delete_dataset]
/**
* List datasets in the authenticated project.
*
* @param {string} name The name for the new dataset.
* @param {function} callback The callback function.
*/
function deleteDataset (name, callback) {
var dataset = bigquery.dataset(name);

// See https://googlecloudplatform.github.io/gcloud-node/#/docs/bigquery/latest/bigquery

function deleteDataset (datasetId, callback) {
var bigquery = BigQuery();
var dataset = bigquery.dataset(datasetId);

// See https://googlecloudplatform.github.io/google-cloud-node/#/docs/bigquery/latest/bigquery/dataset?method=delete
dataset.delete(function (err) {
if (err) {
return callback(err);
}

console.log('Deleted dataset: %s', name);
console.log('Deleted dataset: %s', datasetId);
return callback(null);
});
}
// [END delete_dataset]

// [START list_datasets]
/**
* List datasets in the authenticated project.
*
* @param {string} projectId The project ID to use.
* @param {function} callback The callback function.
*/

function listDatasets (projectId, callback) {
// Instantiate a bigquery client
var bigquery = BigQuery({
projectId: projectId
});

// See https://googlecloudplatform.github.io/gcloud-node/#/docs/bigquery/latest/bigquery
// See https://googlecloudplatform.github.io/google-cloud-node/#/docs/bigquery/latest/bigquery?method=getDatasets
bigquery.getDatasets(function (err, datasets) {
if (err) {
return callback(err);
Expand All @@ -95,31 +66,27 @@ function listDatasets (projectId, callback) {
return callback(null, datasets);
});
}
// [END list_datasets]

// [START get_dataset_size]
/**
* Calculate the size of the specified dataset.
*
* @param {string} datasetId The ID of the dataset.
* @param {string} projectId The project ID.
* @param {function} callback The callback function.
*/
// Control-flow helper library
var async = require('async');

function getDatasetSize (datasetId, projectId, callback) {
// Instantiate a bigquery client
var bigquery = BigQuery({
projectId: projectId
});
var dataset = bigquery.dataset(datasetId);

// See https://googlecloudplatform.github.io/gcloud-node/#/docs/bigquery/latest/bigquery/dataset
// See https://googlecloudplatform.github.io/google-cloud-node/#/docs/bigquery/latest/bigquery/dataset?method=getTables
dataset.getTables(function (err, tables) {
if (err) {
return callback(err);
}

return async.map(tables, function (table, cb) {
// Fetch more detailed info for each table
// See https://googlecloudplatform.github.io/google-cloud-node/#/docs/bigquery/latest/bigquery/table?method=get
table.get(function (err, tableInfo) {
if (err) {
return cb(err);
Expand All @@ -142,7 +109,6 @@ function getDatasetSize (datasetId, projectId, callback) {
});
}
// [END get_dataset_size]
// [END all]

// The command-line program
var cli = require('yargs');
Expand All @@ -161,13 +127,13 @@ var program = module.exports = {

cli
.demand(1)
.command('create <name>', 'Create a new dataset.', {}, function (options) {
program.createDataset(options.name, makeHandler());
.command('create <datasetId>', 'Create a new dataset with the specified ID.', {}, function (options) {
program.createDataset(options.datasetId, makeHandler());
})
.command('delete <datasetId>', 'Delete the specified dataset.', {}, function (options) {
.command('delete <datasetId>', 'Delete the dataset with the specified ID.', {}, function (options) {
program.deleteDataset(options.datasetId, makeHandler());
})
.command('list', 'List datasets in the authenticated project.', {}, function (options) {
.command('list', 'List datasets in the specified project.', {}, function (options) {
program.listDatasets(options.projectId, makeHandler(true, 'id'));
})
.command('size <datasetId>', 'Calculate the size of the specified dataset.', {}, function (options) {
Expand All @@ -181,13 +147,13 @@ cli
description: 'Optionally specify the project ID to use.',
global: true
})
.example('node $0 create my_dataset', 'Create a new dataset named "my_dataset".')
.example('node $0 delete my_dataset', 'Delete "my_dataset".')
.example('node $0 create my_dataset', 'Create a new dataset with the ID "my_dataset".')
.example('node $0 delete my_dataset', 'Delete a dataset identified as "my_dataset".')
.example('node $0 list', 'List datasets.')
.example('node $0 list -p bigquery-public-data', 'List datasets in a project other than the authenticated project.')
.example('node $0 list -p bigquery-public-data', 'List datasets in the "bigquery-public-data" project.')
.example('node $0 size my_dataset', 'Calculate the size of "my_dataset".')
.example('node $0 size hacker_news -p bigquery-public-data', 'Calculate the size of "bigquery-public-data:hacker_news".')
.wrap(100)
.wrap(120)
.recommendCommands()
.epilogue('For more information, see https://cloud.google.com/bigquery/docs');

Expand Down
143 changes: 60 additions & 83 deletions bigquery/queries.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,120 +24,97 @@

'use strict';

// [START auth]
// [START setup]
// By default, gcloud will authenticate using the service account file specified
// by the GOOGLE_APPLICATION_CREDENTIALS environment variable and use the
// project specified by the GCLOUD_PROJECT environment variable. See
// https://googlecloudplatform.github.io/gcloud-node/#/docs/guides/authentication
var BigQuery = require('@google-cloud/bigquery');
// [END setup]

// Instantiate the bigquery client
var bigquery = BigQuery();
// [END auth]

// [START sync_query]
/**
* Run a synchronous query.
* @param {string} query The BigQuery query to run, as a string.
* @param {function} callback Callback function to receive query results.
*/
function syncQuery (query, callback) {
if (!query) {
return callback(new Error('"query" is required!'));
}
function syncQuery (sqlQuery, callback) {
var bigquery = BigQuery();

// Construct query object.
// Query options list: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query
var queryObj = {
query: query,
timeoutMs: 10000 // Time out after 10 seconds.
};

// Run query
bigquery.query(queryObj, function (err, rows) {
if (err) {
return callback(err);
}
var options = {
query: sqlQuery,

console.log('SyncQuery: found %d rows!', rows.length);
return callback(null, rows);
});
}
// [END sync_query]
// Time out after 10 seconds.
timeoutMs: 10000,

// [START async_query]
/**
* Run an asynchronous query.
* @param {string} query The BigQuery query to run, as a string.
* @param {function} callback Callback function to receive job data.
*/
function asyncQuery (query, callback) {
if (!query) {
return callback(new Error('"query" is required!'));
}

// Construct query object
// Query options list: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query
var queryObj = {
query: query
// Use standard SQL syntax for queries.
// See: https://cloud.google.com/bigquery/sql-reference/
useLegacySql: false
};

// Submit query asynchronously
bigquery.startQuery(queryObj, function (err, job) {
// Run the query
// See https://googlecloudplatform.github.io/google-cloud-node/#/docs/bigquery/latest/bigquery?method=query
bigquery.query(options, function (err, rows) {
if (err) {
return callback(err);
}

console.log('AsyncQuery: submitted job %s!', job.id);
return callback(null, job);
console.log('Received %d row(s)!', rows.length);
return callback(null, rows);
});
}

/**
* Poll an asynchronous query job for results.
* @param {object} jobId The ID of the BigQuery job to poll.
* @param {function} callback Callback function to receive query results.
*/
function asyncPoll (jobId, callback) {
if (!jobId) {
return callback(new Error('"jobId" is required!'));
}
function waitForJob (jobId, callback) {
var bigquery = BigQuery();

// Check for job status
// See https://googlecloudplatform.github.io/google-cloud-node/#/docs/bigquery/latest/bigquery/job
var job = bigquery.job(jobId);
job.getMetadata(function (err, metadata) {
if (err) {
return callback(err);
}
console.log('Job status: %s', metadata.status.state);

// If job is done, get query results; if not, return an error.
if (metadata.status.state === 'DONE') {
job
.on('error', callback)
.on('complete', function (metadata) {
// The job is done, get query results
// See https://googlecloudplatform.github.io/google-cloud-node/#/docs/bigquery/latest/bigquery/job?method=getQueryResults
job.getQueryResults(function (err, rows) {
if (err) {
return callback(err);
}

console.log('AsyncQuery: polled job %s; got %d rows!', jobId, rows.length);
console.log('Job complete, received %d row(s)!', rows.length);
return callback(null, rows);
});
} else {
return callback(new Error('Job %s is not done', jobId));
});
}

function asyncQuery (sqlQuery, callback) {
var bigquery = BigQuery();

// Construct query object
// Query options list: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query
var options = {
query: sqlQuery,

// Use standard SQL syntax for queries.
// See: https://cloud.google.com/bigquery/sql-reference/
useLegacySql: false
};

// Run the query asynchronously
// See https://googlecloudplatform.github.io/google-cloud-node/#/docs/bigquery/latest/bigquery?method=startQuery
bigquery.startQuery(options, function (err, job) {
if (err) {
return callback(err);
}

console.log('Started job: %s', job.id);
return waitForJob(job.id, callback);
});
}
// [END async_query]
// [END all]

// The command-line program
var cli = require('yargs');
var makeHandler = require('../utils').makeHandler;

var program = module.exports = {
asyncQuery: asyncQuery,
asyncPoll: asyncPoll,
waitForJob: waitForJob,
syncQuery: syncQuery,
bigquery: bigquery,
main: function (args) {
// Run the command-line program
cli.help().strict().parse(args).argv;
Expand All @@ -146,19 +123,19 @@ var program = module.exports = {

cli
.demand(1)
.command('sync <query>', 'Run a synchronous query.', {}, function (options) {
program.syncQuery(options.query, makeHandler());
.command('sync <sqlQuery>', 'Run the specified synchronous query.', {}, function (options) {
program.syncQuery(options.sqlQuery, makeHandler());
})
.command('async <query>', 'Start an asynchronous query.', {}, function (options) {
program.asyncQuery(options.query, makeHandler());
.command('async <sqlQuery>', 'Start the specified asynchronous query.', {}, function (options) {
program.asyncQuery(options.sqlQuery, makeHandler());
})
.command('poll <jobId>', 'Get the status of a job.', {}, function (options) {
program.asyncPoll(options.jobId, makeHandler());
.command('wait <jobId>', 'Wait for the specified job to complete and retrieve its results.', {}, function (options) {
program.waitForJob(options.jobId, makeHandler());
})
.example('node $0 sync "SELECT * FROM publicdata:samples.natality LIMIT 5;"')
.example('node $0 async "SELECT * FROM publicdata:samples.natality LIMIT 5;"')
.example('node $0 poll 12345')
.wrap(80)
.example('node $0 sync "SELECT * FROM `publicdata.samples.natality` LIMIT 5;"')
.example('node $0 async "SELECT * FROM `publicdata.samples.natality` LIMIT 5;"')
.example('node $0 wait job_VwckYXnR8yz54GBDMykIGnrc2')
.wrap(120)
.recommendCommands()
.epilogue('For more information, see https://cloud.google.com/bigquery/docs');

Expand Down
Loading

0 comments on commit aaa4f77

Please sign in to comment.