Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify BigQuery samples according to our standard. #207

Merged
merged 4 commits into from
Sep 7, 2016
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 30 additions & 64 deletions bigquery/datasets.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,79 +13,50 @@

'use strict';

// [START all]
// [START setup]
// By default, the client will authenticate using the service account file
// specified by the GOOGLE_APPLICATION_CREDENTIALS environment variable and use
// the project specified by the GCLOUD_PROJECT environment variable. See
// https://googlecloudplatform.github.io/gcloud-node/#/docs/google-cloud/latest/guides/authentication
// https://googlecloudplatform.github.io/google-cloud-node/#/docs/google-cloud/latest/guides/authentication
var BigQuery = require('@google-cloud/bigquery');

// Instantiate the bigquery client
var bigquery = BigQuery();
// [END setup]

// Control-flow helper library
var async = require('async');
function createDataset (datasetId, callback) {
var bigquery = BigQuery();
var dataset = bigquery.dataset(datasetId);

// [START create_dataset]
/**
* List datasets in the authenticated project.
*
* @param {string} name The name for the new dataset.
* @param {function} callback The callback function.
*/
function createDataset (name, callback) {
var dataset = bigquery.dataset(name);

// See https://googlecloudplatform.github.io/gcloud-node/#/docs/bigquery/latest/bigquery
dataset.create(function (err, dataset) {
// See https://googlecloudplatform.github.io/google-cloud-node/#/docs/bigquery/latest/bigquery/dataset?method=create
dataset.create(function (err, dataset, apiResponse) {
if (err) {
return callback(err);
}

console.log('Created dataset: %s', name);
return callback(null, dataset);
console.log('Created dataset: %s', datasetId);
return callback(null, dataset, apiResponse);
});
}
// [END create_dataset]

// [START delete_dataset]
/**
* List datasets in the authenticated project.
*
* @param {string} name The name for the new dataset.
* @param {function} callback The callback function.
*/
function deleteDataset (name, callback) {
var dataset = bigquery.dataset(name);

// See https://googlecloudplatform.github.io/gcloud-node/#/docs/bigquery/latest/bigquery

function deleteDataset (datasetId, callback) {
var bigquery = BigQuery();
var dataset = bigquery.dataset(datasetId);

// See https://googlecloudplatform.github.io/google-cloud-node/#/docs/bigquery/latest/bigquery/dataset?method=delete
dataset.delete(function (err) {
if (err) {
return callback(err);
}

console.log('Deleted dataset: %s', name);
console.log('Deleted dataset: %s', datasetId);
return callback(null);
});
}
// [END delete_dataset]

// [START list_datasets]
/**
* List datasets in the authenticated project.
*
* @param {string} projectId The project ID to use.
* @param {function} callback The callback function.
*/

function listDatasets (projectId, callback) {
// Instantiate a bigquery client
var bigquery = BigQuery({
projectId: projectId
});

// See https://googlecloudplatform.github.io/gcloud-node/#/docs/bigquery/latest/bigquery
// See https://googlecloudplatform.github.io/google-cloud-node/#/docs/bigquery/latest/bigquery?method=getDatasets
bigquery.getDatasets(function (err, datasets) {
if (err) {
return callback(err);
Expand All @@ -95,31 +66,27 @@ function listDatasets (projectId, callback) {
return callback(null, datasets);
});
}
// [END list_datasets]

// [START get_dataset_size]
/**
* Calculate the size of the specified dataset.
*
* @param {string} datasetId The ID of the dataset.
* @param {string} projectId The project ID.
* @param {function} callback The callback function.
*/
// Control-flow helper library
var async = require('async');

function getDatasetSize (datasetId, projectId, callback) {
// Instantiate a bigquery client
var bigquery = BigQuery({
projectId: projectId
});
var dataset = bigquery.dataset(datasetId);

// See https://googlecloudplatform.github.io/gcloud-node/#/docs/bigquery/latest/bigquery/dataset
// See https://googlecloudplatform.github.io/google-cloud-node/#/docs/bigquery/latest/bigquery/dataset?method=getTables
dataset.getTables(function (err, tables) {
if (err) {
return callback(err);
}

return async.map(tables, function (table, cb) {
// Fetch more detailed info for each table
// See https://googlecloudplatform.github.io/google-cloud-node/#/docs/bigquery/latest/bigquery/table?method=get
table.get(function (err, tableInfo) {
if (err) {
return cb(err);
Expand All @@ -142,7 +109,6 @@ function getDatasetSize (datasetId, projectId, callback) {
});
}
// [END get_dataset_size]
// [END all]

// The command-line program
var cli = require('yargs');
Expand All @@ -161,13 +127,13 @@ var program = module.exports = {

cli
.demand(1)
.command('create <name>', 'Create a new dataset.', {}, function (options) {
program.createDataset(options.name, makeHandler());
.command('create <datasetId>', 'Create a new dataset with the specified ID.', {}, function (options) {
program.createDataset(options.datasetId, makeHandler());
})
.command('delete <datasetId>', 'Delete the specified dataset.', {}, function (options) {
.command('delete <datasetId>', 'Delete the dataset with the specified ID.', {}, function (options) {
program.deleteDataset(options.datasetId, makeHandler());
})
.command('list', 'List datasets in the authenticated project.', {}, function (options) {
.command('list', 'List datasets in the specified project.', {}, function (options) {
program.listDatasets(options.projectId, makeHandler(true, 'id'));
})
.command('size <datasetId>', 'Calculate the size of the specified dataset.', {}, function (options) {
Expand All @@ -181,13 +147,13 @@ cli
description: 'Optionally specify the project ID to use.',
global: true
})
.example('node $0 create my_dataset', 'Create a new dataset named "my_dataset".')
.example('node $0 delete my_dataset', 'Delete "my_dataset".')
.example('node $0 create my_dataset', 'Create a new dataset with the ID "my_dataset".')
.example('node $0 delete my_dataset', 'Delete a dataset identified as "my_dataset".')
.example('node $0 list', 'List datasets.')
.example('node $0 list -p bigquery-public-data', 'List datasets in a project other than the authenticated project.')
.example('node $0 list -p bigquery-public-data', 'List datasets in the "bigquery-public-data" project.')
.example('node $0 size my_dataset', 'Calculate the size of "my_dataset".')
.example('node $0 size hacker_news -p bigquery-public-data', 'Calculate the size of "bigquery-public-data:hacker_news".')
.wrap(100)
.wrap(120)
.recommendCommands()
.epilogue('For more information, see https://cloud.google.com/bigquery/docs');

Expand Down
143 changes: 60 additions & 83 deletions bigquery/queries.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,120 +24,97 @@

'use strict';

// [START auth]
// [START setup]
// By default, gcloud will authenticate using the service account file specified
// by the GOOGLE_APPLICATION_CREDENTIALS environment variable and use the
// project specified by the GCLOUD_PROJECT environment variable. See
// https://googlecloudplatform.github.io/gcloud-node/#/docs/guides/authentication
var BigQuery = require('@google-cloud/bigquery');
// [END setup]

// Instantiate the bigquery client
var bigquery = BigQuery();
// [END auth]

// [START sync_query]
/**
* Run a synchronous query.
* @param {string} query The BigQuery query to run, as a string.
* @param {function} callback Callback function to receive query results.
*/
function syncQuery (query, callback) {
if (!query) {
return callback(new Error('"query" is required!'));
}
function syncQuery (sqlQuery, callback) {
var bigquery = BigQuery();

// Construct query object.
// Query options list: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query
var queryObj = {
query: query,
timeoutMs: 10000 // Time out after 10 seconds.
};

// Run query
bigquery.query(queryObj, function (err, rows) {
if (err) {
return callback(err);
}
var options = {
query: sqlQuery,

console.log('SyncQuery: found %d rows!', rows.length);
return callback(null, rows);
});
}
// [END sync_query]
// Time out after 10 seconds.
timeoutMs: 10000,

// [START async_query]
/**
* Run an asynchronous query.
* @param {string} query The BigQuery query to run, as a string.
* @param {function} callback Callback function to receive job data.
*/
function asyncQuery (query, callback) {
if (!query) {
return callback(new Error('"query" is required!'));
}

// Construct query object
// Query options list: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query
var queryObj = {
query: query
// Use standard SQL syntax for queries.
// See: https://cloud.google.com/bigquery/sql-reference/
useLegacySql: false
};

// Submit query asynchronously
bigquery.startQuery(queryObj, function (err, job) {
// Run the query
// See https://googlecloudplatform.github.io/google-cloud-node/#/docs/bigquery/latest/bigquery?method=query
bigquery.query(options, function (err, rows) {
if (err) {
return callback(err);
}

console.log('AsyncQuery: submitted job %s!', job.id);
return callback(null, job);
console.log('Received %d row(s)!', rows.length);
return callback(null, rows);
});
}

/**
* Poll an asynchronous query job for results.
* @param {object} jobId The ID of the BigQuery job to poll.
* @param {function} callback Callback function to receive query results.
*/
function asyncPoll (jobId, callback) {
if (!jobId) {
return callback(new Error('"jobId" is required!'));
}
function waitForJob (jobId, callback) {
var bigquery = BigQuery();

// Check for job status
// See https://googlecloudplatform.github.io/google-cloud-node/#/docs/bigquery/latest/bigquery/job
var job = bigquery.job(jobId);
job.getMetadata(function (err, metadata) {
if (err) {
return callback(err);
}
console.log('Job status: %s', metadata.status.state);

// If job is done, get query results; if not, return an error.
if (metadata.status.state === 'DONE') {
job
.on('error', callback)
.on('complete', function (metadata) {
// The job is done, get query results
// See https://googlecloudplatform.github.io/google-cloud-node/#/docs/bigquery/latest/bigquery/job?method=getQueryResults
job.getQueryResults(function (err, rows) {
if (err) {
return callback(err);
}

console.log('AsyncQuery: polled job %s; got %d rows!', jobId, rows.length);
console.log('Job complete, received %d row(s)!', rows.length);
return callback(null, rows);
});
} else {
return callback(new Error('Job %s is not done', jobId));
});
}

function asyncQuery (sqlQuery, callback) {
var bigquery = BigQuery();

// Construct query object
// Query options list: https://cloud.google.com/bigquery/docs/reference/v2/jobs/query
var options = {
query: sqlQuery,

// Use standard SQL syntax for queries.
// See: https://cloud.google.com/bigquery/sql-reference/
useLegacySql: false
};

// Run the query asynchronously
// See https://googlecloudplatform.github.io/google-cloud-node/#/docs/bigquery/latest/bigquery?method=startQuery
bigquery.startQuery(options, function (err, job) {
if (err) {
return callback(err);
}

console.log('Started job: %s', job.id);
return waitForJob(job.id, callback);
});
}
// [END async_query]
// [END all]

// The command-line program
var cli = require('yargs');
var makeHandler = require('../utils').makeHandler;

var program = module.exports = {
asyncQuery: asyncQuery,
asyncPoll: asyncPoll,
waitForJob: waitForJob,
syncQuery: syncQuery,
bigquery: bigquery,
main: function (args) {
// Run the command-line program
cli.help().strict().parse(args).argv;
Expand All @@ -146,19 +123,19 @@ var program = module.exports = {

cli
.demand(1)
.command('sync <query>', 'Run a synchronous query.', {}, function (options) {
program.syncQuery(options.query, makeHandler());
.command('sync <sqlQuery>', 'Run the specified synchronous query.', {}, function (options) {
program.syncQuery(options.sqlQuery, makeHandler());
})
.command('async <query>', 'Start an asynchronous query.', {}, function (options) {
program.asyncQuery(options.query, makeHandler());
.command('async <sqlQuery>', 'Start the specified asynchronous query.', {}, function (options) {
program.asyncQuery(options.sqlQuery, makeHandler());
})
.command('poll <jobId>', 'Get the status of a job.', {}, function (options) {
program.asyncPoll(options.jobId, makeHandler());
.command('wait <jobId>', 'Wait for the specified job to complete and retrieve its results.', {}, function (options) {
program.waitForJob(options.jobId, makeHandler());
})
.example('node $0 sync "SELECT * FROM publicdata:samples.natality LIMIT 5;"')
.example('node $0 async "SELECT * FROM publicdata:samples.natality LIMIT 5;"')
.example('node $0 poll 12345')
.wrap(80)
.example('node $0 sync "SELECT * FROM `publicdata.samples.natality` LIMIT 5;"')
.example('node $0 async "SELECT * FROM `publicdata.samples.natality` LIMIT 5;"')
.example('node $0 wait job_VwckYXnR8yz54GBDMykIGnrc2')
.wrap(120)
.recommendCommands()
.epilogue('For more information, see https://cloud.google.com/bigquery/docs');

Expand Down
Loading