Skip to content

Commit

Permalink
Better error for external BigQuery tables. (apache#22178)
Browse files Browse the repository at this point in the history
  • Loading branch information
robertwb authored and Konstantin Urysov committed Jul 14, 2022
1 parent ae938eb commit dc51c27
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -211,9 +211,21 @@ private List<ResourceId> executeExtract(
.setUseAvroLogicalTypes(useAvroLogicalTypes)
.setDestinationUris(ImmutableList.of(destinationUri));

LOG.info("Starting BigQuery extract job: {}", jobId);
jobService.startExtractJob(jobRef, extract);
Job extractJob = jobService.pollJob(jobRef, JOB_POLL_MAX_RETRIES);
Job extractJob;
try {
LOG.info("Starting BigQuery extract job: {}", jobId);
jobService.startExtractJob(jobRef, extract);
extractJob = jobService.pollJob(jobRef, JOB_POLL_MAX_RETRIES);
} catch (IOException exn) {
// The error messages thrown in this case are generic and misleading, so leave this breadcrumb
// in case it's the root cause.
LOG.warn(
"Error extracting table: {} "
+ "Note that external tables cannot be exported: "
+ "https://cloud.google.com/bigquery/docs/external-tables#external_table_limitations",
exn);
throw exn;
}
if (BigQueryHelpers.parseStatus(extractJob) != Status.SUCCEEDED) {
throw new IOException(
String.format(
Expand Down
47 changes: 29 additions & 18 deletions sdks/python/apache_beam/io/gcp/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -986,24 +986,35 @@ def _export_files(self, bq):
temp_location = self.options.view_as(GoogleCloudOptions).temp_location
gcs_location = bigquery_export_destination_uri(
self.gcs_location, temp_location, self._source_uuid)
if self.use_json_exports:
job_ref = bq.perform_extract_job([gcs_location],
export_job_name,
self.table_reference,
bigquery_tools.FileFormat.JSON,
project=self._get_project(),
job_labels=job_labels,
include_header=False)
else:
job_ref = bq.perform_extract_job([gcs_location],
export_job_name,
self.table_reference,
bigquery_tools.FileFormat.AVRO,
project=self._get_project(),
include_header=False,
job_labels=job_labels,
use_avro_logical_types=True)
bq.wait_for_bq_job(job_ref)
try:
if self.use_json_exports:
job_ref = bq.perform_extract_job([gcs_location],
export_job_name,
self.table_reference,
bigquery_tools.FileFormat.JSON,
project=self._get_project(),
job_labels=job_labels,
include_header=False)
else:
job_ref = bq.perform_extract_job([gcs_location],
export_job_name,
self.table_reference,
bigquery_tools.FileFormat.AVRO,
project=self._get_project(),
include_header=False,
job_labels=job_labels,
use_avro_logical_types=True)
bq.wait_for_bq_job(job_ref)
except Exception as exn: # pylint: disable=broad-except
# The error messages thrown in this case are generic and misleading,
# so leave this breadcrumb in case it's the root cause.
logging.warning(
"Error exporting table: %s. "
"Note that external tables cannot be exported: "
"https://cloud.google.com/bigquery/docs/external-tables"
"#external_table_limitations",
exn)
raise
metadata_list = FileSystems.match([gcs_location])[0].metadata_list

if isinstance(self.table_reference, vp.ValueProvider):
Expand Down
47 changes: 29 additions & 18 deletions sdks/python/apache_beam/io/gcp/bigquery_read_internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,24 +332,35 @@ def _export_files(
self.gcs_location,
temp_location,
'%s%s' % (self._source_uuid, element.obj_id))
if self.use_json_exports:
job_ref = bq.perform_extract_job([gcs_location],
export_job_name,
table_reference,
bigquery_tools.FileFormat.JSON,
project=self._get_project(),
job_labels=job_labels,
include_header=False)
else:
job_ref = bq.perform_extract_job([gcs_location],
export_job_name,
table_reference,
bigquery_tools.FileFormat.AVRO,
project=self._get_project(),
include_header=False,
job_labels=job_labels,
use_avro_logical_types=True)
bq.wait_for_bq_job(job_ref)
try:
if self.use_json_exports:
job_ref = bq.perform_extract_job([gcs_location],
export_job_name,
table_reference,
bigquery_tools.FileFormat.JSON,
project=self._get_project(),
job_labels=job_labels,
include_header=False)
else:
job_ref = bq.perform_extract_job([gcs_location],
export_job_name,
table_reference,
bigquery_tools.FileFormat.AVRO,
project=self._get_project(),
include_header=False,
job_labels=job_labels,
use_avro_logical_types=True)
bq.wait_for_bq_job(job_ref)
except Exception as exn: # pylint: disable=broad-except
# The error messages thrown in this case are generic and misleading,
# so leave this breadcrumb in case it's the root cause.
logging.warning(
"Error exporting table: %s. "
"Note that external tables cannot be exported: "
"https://cloud.google.com/bigquery/docs/external-tables"
"#external_table_limitations",
exn)
raise
metadata_list = FileSystems.match([gcs_location])[0].metadata_list

if isinstance(table_reference, ValueProvider):
Expand Down

0 comments on commit dc51c27

Please sign in to comment.