Skip to content

Commit

Permalink
BigQuery: Add support to Dataset for project_ids with org prefix. (go…
Browse files Browse the repository at this point in the history
  • Loading branch information
emar-kar committed Sep 18, 2019
1 parent 3c7f4fa commit 7502a1a
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 5 deletions.
25 changes: 20 additions & 5 deletions bigquery/google/cloud/bigquery/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import six
import copy
import re

import google.cloud._helpers
from google.cloud.bigquery import _helpers
Expand All @@ -26,6 +27,14 @@
from google.cloud.bigquery.table import TableReference


_PROJECT_PREFIX_PATTERN = re.compile(
r"""
(?P<project_id>\S+\:[^.]+)\.(?P<dataset_id>[^.]+)$
""",
re.VERBOSE,
)


def _get_table_reference(self, table_id):
"""Constructs a TableReference.
Expand Down Expand Up @@ -269,7 +278,7 @@ def from_string(cls, dataset_id, default_project=None):
Args:
dataset_id (str):
A dataset ID in standard SQL format. If ``default_project``
is not specified, this must included both the project ID and
is not specified, this must include both the project ID and
the dataset ID, separated by ``.``.
default_project (str):
Optional. The project ID to use when ``dataset_id`` does not
Expand All @@ -290,13 +299,19 @@ def from_string(cls, dataset_id, default_project=None):
"""
output_dataset_id = dataset_id
output_project_id = default_project
parts = dataset_id.split(".")
with_prefix = _PROJECT_PREFIX_PATTERN.match(dataset_id)
if with_prefix is None:
parts = dataset_id.split(".")
else:
project_id = with_prefix.group("project_id")
dataset_id = with_prefix.group("dataset_id")
parts = [project_id, dataset_id]

if len(parts) == 1 and not default_project:
raise ValueError(
"When default_project is not set, dataset_id must be a "
"fully-qualified dataset ID in standard SQL format. "
'e.g. "project.dataset_id", got {}'.format(dataset_id)
"fully-qualified dataset ID in standard SQL format, "
'e.g., "project.dataset_id" got {}'.format(dataset_id)
)
elif len(parts) == 2:
output_project_id, output_dataset_id = parts
Expand Down Expand Up @@ -554,7 +569,7 @@ def from_string(cls, full_dataset_id):
Args:
full_dataset_id (str):
A fully-qualified dataset ID in standard SQL format. Must
included both the project ID and the dataset ID, separated by
include both the project ID and the dataset ID, separated by
``.``.
Returns:
Expand Down
16 changes: 16 additions & 0 deletions bigquery/tests/unit/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,11 +186,27 @@ def test_from_string(self):
self.assertEqual(got.project, "string-project")
self.assertEqual(got.dataset_id, "string_dataset")

def test_from_string_w_prefix(self):
cls = self._get_target_class()
got = cls.from_string("google.com:string-project.string_dataset")
self.assertEqual(got.project, "google.com:string-project")
self.assertEqual(got.dataset_id, "string_dataset")

def test_from_string_legacy_string(self):
cls = self._get_target_class()
with self.assertRaises(ValueError):
cls.from_string("string-project:string_dataset")

def test_from_string_w_incorrect_prefix(self):
cls = self._get_target_class()
with self.assertRaises(ValueError):
cls.from_string("google.com.string-project.dataset_id")

def test_from_string_w_prefix_and_too_many_parts(self):
cls = self._get_target_class()
with self.assertRaises(ValueError):
cls.from_string("google.com:string-project.dataset_id.table_id")

def test_from_string_not_fully_qualified(self):
cls = self._get_target_class()
with self.assertRaises(ValueError):
Expand Down

0 comments on commit 7502a1a

Please sign in to comment.