Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support BI Engine statistics in query job #1144

Merged
merged 4 commits into from
Feb 18, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions google/cloud/bigquery/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,46 @@ class AutoRowIDs(enum.Enum):
GENERATE_UUID = enum.auto()


class BiEngineReasonCode(enum.Enum):
anmolsahoo25 marked this conversation as resolved.
Show resolved Hide resolved
"""Specifies reason why BI Engine did not accelerate query"""

CODE_UNSPECIFIED = enum.auto()
"""BiEngineReason not specified."""

NO_RESERVATION = enum.auto()
"""No reservation available for BI Engine acceleration."""

INSUFFICIENT_RESERVATION = enum.auto()
"""Not enough memory available for BI Engine acceleration."""

UNCACHED = enum.auto()
"""Data is not-cached and could not be accelerated by BI Engine."""

UNSUPPORTED_SQL_TEXT = enum.auto()
"""This particular SQL text is not supported for acceleration by BI Engine."""

INPUT_TOO_LARGE = enum.auto()
"""Input too large for acceleration by BI Engine."""

OTHER_REASON = enum.auto()
"""Catch-all code for all other cases for partial or disabled acceleration."""

TABLE_EXCLUDED = enum.auto()
"""One or more tables were not eligible for BI Engine acceleration."""


class BiEngineMode(enum.Enum):
anmolsahoo25 marked this conversation as resolved.
Show resolved Hide resolved
"""Specifies which mode of BI Engine acceleration was performed"""

ACCELERATION_MODE_UNSPECIFIED = enum.auto()

DISABLED = enum.auto()

PARTIAL = enum.auto()

FULL = enum.auto()


class Compression(object):
"""The compression type to use for exported files. The default value is
:attr:`NONE`.
Expand Down
54 changes: 54 additions & 0 deletions google/cloud/bigquery/job/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
from google.cloud.bigquery.enums import KeyResultStatementKind
from google.cloud.bigquery.enums import BiEngineMode, BiEngineReasonCode
from google.cloud.bigquery.external_config import ExternalConfig
from google.cloud.bigquery import _helpers
from google.cloud.bigquery.query import (
Expand Down Expand Up @@ -121,6 +122,50 @@ def _to_api_repr_table_defs(value):
return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()}


class BiEngineReason(typing.NamedTuple):
anmolsahoo25 marked this conversation as resolved.
Show resolved Hide resolved
"""Reason for BI Engine acceleration failure
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#bienginereason
"""

code: BiEngineReasonCode = BiEngineReasonCode.CODE_UNSPECIFIED

reason: str = ""

@classmethod
def from_api_repr(cls, reason: Dict[str, str]) -> "BiEngineReason":
return cls(BiEngineReasonCode[reason.get("code")], reason.get("message"))


class BiEngineStats(typing.NamedTuple):
"""Statistics for a BI Engine query
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#bienginestatistics
"""

mode: BiEngineMode = BiEngineMode.ACCELERATION_MODE_UNSPECIFIED
""" Specifies which mode of BI Engine acceleration was performed (if any)
"""

reasons: List[BiEngineReason] = []
""" Contains explanatory messages in case of DISABLED / PARTIAL acceleration
"""

@classmethod
def from_api_repr(cls, stats: Dict[str, str]) -> "BiEngineStats":
tswast marked this conversation as resolved.
Show resolved Hide resolved
biEngineMode = stats.get("biEngineMode")
biEngineReasons = stats.get("biEngineReasons")

mode = BiEngineMode[biEngineMode]

if biEngineReasons is None:
reasons = []
else:
reasons = [BiEngineReason.from_api_repr(r) for r in biEngineReasons]

return cls(mode, reasons)


class DmlStats(typing.NamedTuple):
"""Detailed statistics for DML statements.
Expand Down Expand Up @@ -1191,6 +1236,15 @@ def dml_stats(self) -> Optional[DmlStats]:
else:
return DmlStats.from_api_repr(stats)

@property
def bi_engine_stats(self) -> Optional[BiEngineStats]:
stats = self._job_statistics().get("biEngineStatistics")

if stats is not None:
return None
else:
return BiEngineStats.from_api_repr(stats)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we've got the logic flipped here. (Possibly an indication we need some unit test coverage for this?)

Suggested change
if stats is not None:
return None
else:
return BiEngineStats.from_api_repr(stats)
if stats is None:
return None
else:
return BiEngineStats.from_api_repr(stats)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add a test much like this one:

def test_ddl_target_routine(self):

That checks that it can be None if the stats aren't present, otherwise return the expected object.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My bad! Added a test case as well.


def _blocking_poll(self, timeout=None, **kwargs):
self._done_timeout = timeout
self._transport_timeout = timeout
Expand Down
57 changes: 57 additions & 0 deletions tests/unit/job/test_query_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,63 @@
# limitations under the License.

from .helpers import _Base
from google.cloud.bigquery.enums import BiEngineMode, BiEngineReasonCode


class TestBiEngineStats:
@staticmethod
def _get_target_class():
from google.cloud.bigquery.job.query import BiEngineStats

return BiEngineStats

def _make_one(self, *args, **kw):
return self._get_target_class()(*args, **kw)

def test_ctor_defaults(self):
bi_engine_stats = self._make_one()
assert bi_engine_stats.mode == BiEngineMode.ACCELERATION_MODE_UNSPECIFIED
assert bi_engine_stats.reasons == []

def test_from_api_repr_unspecified(self):
klass = self._get_target_class()
result = klass.from_api_repr({"biEngineMode": "ACCELERATION_MODE_UNSPECIFIED"})

assert isinstance(result, klass)
assert result.mode == BiEngineMode.ACCELERATION_MODE_UNSPECIFIED
assert result.reasons == []

def test_from_api_repr_full(self):
klass = self._get_target_class()
result = klass.from_api_repr({"biEngineMode": "FULL"})

assert isinstance(result, klass)
assert result.mode == BiEngineMode.FULL
assert result.reasons == []

def test_from_api_repr_disabled(self):
klass = self._get_target_class()
result = klass.from_api_repr(
{
"biEngineMode": "DISABLED",
"biEngineReasons": [
{
"code": "OTHER_REASON",
"message": "Unable to support input table xyz due to an internal error.",
}
],
}
)

assert isinstance(result, klass)
assert result.mode == BiEngineMode.DISABLED

reason = result.reasons[0]
assert reason.code == BiEngineReasonCode.OTHER_REASON
assert (
reason.reason
== "Unable to support input table xyz due to an internal error."
)


class TestDmlStats:
Expand Down