From e1aa9218ad22f85c9a6cab8b61d013779376a582 Mon Sep 17 00:00:00 2001 From: Steffany Brown <30247553+steffnay@users.noreply.github.com> Date: Thu, 8 Dec 2022 15:53:38 -0800 Subject: [PATCH] deps: update dependencies (#1282) * update dependencies * deps: pyarrow extras * clean up comments * add test pyarrow skips * replace storage checks * update tests * update tests * Update setup.py * update system tests * update verify_pandas_imports * add pyarrow guards * add datetime check * change pyarrow import * update * add pyarrow skips * fix types * lint * Update google/cloud/bigquery/client.py Co-authored-by: Tim Swast * update pyarrow version * update test * lint * update pyarrow req * update noxfile * remove bignum check * remove comments * add test importorskip * update test * update test * update dependency * change version * update imports Co-authored-by: Anthonios Partheniou Co-authored-by: Tim Swast --- docs/snippets.py | 5 + google/cloud/bigquery/__init__.py | 5 + google/cloud/bigquery/_helpers.py | 74 +++++++++- google/cloud/bigquery/_pandas_helpers.py | 127 +++++++++------- google/cloud/bigquery/client.py | 55 ++++++- google/cloud/bigquery/exceptions.py | 25 ++++ google/cloud/bigquery/job/query.py | 6 +- google/cloud/bigquery/magics/magics.py | 11 ++ google/cloud/bigquery/table.py | 39 ++++- setup.py | 25 +++- testing/constraints-3.7.txt | 4 +- tests/system/test_client.py | 24 +++- tests/system/test_pandas.py | 27 ++-- tests/unit/job/test_query_pandas.py | 32 ++++- tests/unit/test__helpers.py | 71 +++++++++ tests/unit/test__pandas_helpers.py | 117 +++++++++++++-- tests/unit/test_client.py | 176 ++++++++++++++++++++++- tests/unit/test_dbapi__helpers.py | 6 + tests/unit/test_dbapi_connection.py | 22 ++- tests/unit/test_dbapi_cursor.py | 25 +++- tests/unit/test_magics.py | 77 +++++++++- tests/unit/test_table.py | 176 +++++++++++++++++++++-- tests/unit/test_table_pandas.py | 2 +- 23 files changed, 1013 insertions(+), 118 deletions(-) create mode 100644 google/cloud/bigquery/exceptions.py diff --git a/docs/snippets.py b/docs/snippets.py index 238fd52c3..05e4fa378 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -31,6 +31,11 @@ except (ImportError, AttributeError): pandas = None +try: + import pyarrow +except (ImportError, AttributeError): + pyarrow = None + from google.api_core.exceptions import InternalServerError from google.api_core.exceptions import ServiceUnavailable from google.api_core.exceptions import TooManyRequests diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 5a4520476..ebd5b3109 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -42,6 +42,8 @@ from google.cloud.bigquery.enums import KeyResultStatementKind from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlTypeNames +from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError +from google.cloud.bigquery.exceptions import LegacyPyarrowError from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions from google.cloud.bigquery.external_config import BigtableColumnFamily @@ -195,6 +197,9 @@ "WriteDisposition", # EncryptionConfiguration "EncryptionConfiguration", + # Custom exceptions + "LegacyBigQueryStorageError", + "LegacyPyarrowError", ] diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index b59bc86d3..014a721a8 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -20,7 +20,7 @@ import math import re import os -from typing import Optional, Union +from typing import Any, Optional, Union from dateutil import relativedelta from google.cloud._helpers import UTC # type: ignore @@ -32,6 +32,11 @@ import packaging.version +from google.cloud.bigquery.exceptions import ( + LegacyBigQueryStorageError, + LegacyPyarrowError, +) + _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" _TIMEONLY_WO_MICROS = "%H:%M:%S" _TIMEONLY_W_MICROS = "%H:%M:%S.%f" @@ -50,6 +55,10 @@ r"(?P-?)(?P\d+):(?P\d+):(?P\d+)\.?(?P\d*)?$" ) +_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") + +_MIN_PYARROW_VERSION = packaging.version.Version("3.0.0") + _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") BIGQUERY_EMULATOR_HOST = "BIGQUERY_EMULATOR_HOST" @@ -83,7 +92,7 @@ def installed_version(self) -> packaging.version.Version: getattr(bigquery_storage, "__version__", "0.0.0") ) - return self._installed_version + return self._installed_version # type: ignore @property def is_read_session_optional(self) -> bool: @@ -93,6 +102,29 @@ def is_read_session_optional(self) -> bool: """ return self.installed_version >= _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION + def verify_version(self): + """Verify that a recent enough version of BigQuery Storage extra is + installed. + + The function assumes that google-cloud-bigquery-storage extra is + installed, and should thus be used in places where this assumption + holds. + + Because `pip` can install an outdated version of this extra despite the + constraints in `setup.py`, the calling code can use this helper to + verify the version compatibility at runtime. + + Raises: + LegacyBigQueryStorageError: + If the google-cloud-bigquery-storage package is outdated. + """ + if self.installed_version < _MIN_BQ_STORAGE_VERSION: + msg = ( + "Dependency google-cloud-bigquery-storage is outdated, please upgrade " + f"it to version >= {_MIN_BQ_STORAGE_VERSION} (version found: {self.installed_version})." + ) + raise LegacyBigQueryStorageError(msg) + class PyarrowVersions: """Version comparisons for pyarrow package.""" @@ -120,6 +152,44 @@ def installed_version(self) -> packaging.version.Version: def use_compliant_nested_type(self) -> bool: return self.installed_version.major >= 4 + def try_import(self, raise_if_error: bool = False) -> Any: + """Verify that a recent enough version of pyarrow extra is + installed. + + The function assumes that pyarrow extra is installed, and should thus + be used in places where this assumption holds. + + Because `pip` can install an outdated version of this extra despite the + constraints in `setup.py`, the calling code can use this helper to + verify the version compatibility at runtime. + + Returns: + The ``pyarrow`` module or ``None``. + + Raises: + LegacyPyarrowError: + If the pyarrow package is outdated and ``raise_if_error`` is ``True``. + """ + try: + import pyarrow + except ImportError as exc: # pragma: NO COVER + if raise_if_error: + raise LegacyPyarrowError( + f"pyarrow package not found. Install pyarrow version >= {_MIN_PYARROW_VERSION}." + ) from exc + return None + + if self.installed_version < _MIN_PYARROW_VERSION: + if raise_if_error: + msg = ( + "Dependency pyarrow is outdated, please upgrade " + f"it to version >= {_MIN_PYARROW_VERSION} (version found: {self.installed_version})." + ) + raise LegacyPyarrowError(msg) + return None + + return pyarrow + BQ_STORAGE_VERSIONS = BQStorageVersions() PYARROW_VERSIONS = PyarrowVersions() diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 0d05f53a3..3d7e7d793 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -22,6 +22,11 @@ import queue import warnings +from packaging import version + +from google.cloud.bigquery import _helpers +from google.cloud.bigquery import schema + try: import pandas # type: ignore @@ -43,9 +48,7 @@ db_dtypes_import_exception = exc date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype - -import pyarrow # type: ignore -import pyarrow.parquet # type: ignore +pyarrow = _helpers.PYARROW_VERSIONS.try_import() try: # _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array` @@ -77,10 +80,6 @@ def _to_wkb(v): # Having BQ Storage available implies that pyarrow >=1.0.0 is available, too. _ARROW_COMPRESSION_SUPPORT = True -from google.cloud.bigquery import _helpers -from google.cloud.bigquery import schema - - _LOGGER = logging.getLogger(__name__) _PROGRESS_INTERVAL = 0.2 # Maximum time between download status checks, in seconds. @@ -141,52 +140,65 @@ def pyarrow_timestamp(): return pyarrow.timestamp("us", tz="UTC") -# This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py -# When modifying it be sure to update it there as well. -BQ_TO_ARROW_SCALARS = { - "BIGNUMERIC": pyarrow_bignumeric, - "BOOL": pyarrow.bool_, - "BOOLEAN": pyarrow.bool_, - "BYTES": pyarrow.binary, - "DATE": pyarrow.date32, - "DATETIME": pyarrow_datetime, - "FLOAT": pyarrow.float64, - "FLOAT64": pyarrow.float64, - "GEOGRAPHY": pyarrow.string, - "INT64": pyarrow.int64, - "INTEGER": pyarrow.int64, - "NUMERIC": pyarrow_numeric, - "STRING": pyarrow.string, - "TIME": pyarrow_time, - "TIMESTAMP": pyarrow_timestamp, -} -ARROW_SCALAR_IDS_TO_BQ = { - # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes - pyarrow.bool_().id: "BOOL", - pyarrow.int8().id: "INT64", - pyarrow.int16().id: "INT64", - pyarrow.int32().id: "INT64", - pyarrow.int64().id: "INT64", - pyarrow.uint8().id: "INT64", - pyarrow.uint16().id: "INT64", - pyarrow.uint32().id: "INT64", - pyarrow.uint64().id: "INT64", - pyarrow.float16().id: "FLOAT64", - pyarrow.float32().id: "FLOAT64", - pyarrow.float64().id: "FLOAT64", - pyarrow.time32("ms").id: "TIME", - pyarrow.time64("ns").id: "TIME", - pyarrow.timestamp("ns").id: "TIMESTAMP", - pyarrow.date32().id: "DATE", - pyarrow.date64().id: "DATETIME", # because millisecond resolution - pyarrow.binary().id: "BYTES", - pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() - # The exact scale and precision don't matter, see below. - pyarrow.decimal128(38, scale=9).id: "NUMERIC", - # The exact decimal's scale and precision are not important, as only - # the type ID matters, and it's the same for all decimal256 instances. - pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC", -} +if pyarrow: + # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py + # When modifying it be sure to update it there as well. + BQ_TO_ARROW_SCALARS = { + "BOOL": pyarrow.bool_, + "BOOLEAN": pyarrow.bool_, + "BYTES": pyarrow.binary, + "DATE": pyarrow.date32, + "DATETIME": pyarrow_datetime, + "FLOAT": pyarrow.float64, + "FLOAT64": pyarrow.float64, + "GEOGRAPHY": pyarrow.string, + "INT64": pyarrow.int64, + "INTEGER": pyarrow.int64, + "NUMERIC": pyarrow_numeric, + "STRING": pyarrow.string, + "TIME": pyarrow_time, + "TIMESTAMP": pyarrow_timestamp, + } + ARROW_SCALAR_IDS_TO_BQ = { + # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes + pyarrow.bool_().id: "BOOL", + pyarrow.int8().id: "INT64", + pyarrow.int16().id: "INT64", + pyarrow.int32().id: "INT64", + pyarrow.int64().id: "INT64", + pyarrow.uint8().id: "INT64", + pyarrow.uint16().id: "INT64", + pyarrow.uint32().id: "INT64", + pyarrow.uint64().id: "INT64", + pyarrow.float16().id: "FLOAT64", + pyarrow.float32().id: "FLOAT64", + pyarrow.float64().id: "FLOAT64", + pyarrow.time32("ms").id: "TIME", + pyarrow.time64("ns").id: "TIME", + pyarrow.timestamp("ns").id: "TIMESTAMP", + pyarrow.date32().id: "DATE", + pyarrow.date64().id: "DATETIME", # because millisecond resolution + pyarrow.binary().id: "BYTES", + pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() + # The exact scale and precision don't matter, see below. + pyarrow.decimal128(38, scale=9).id: "NUMERIC", + } + + if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): + BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric + # The exact decimal's scale and precision are not important, as only + # the type ID matters, and it's the same for all decimal256 instances. + ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" + _BIGNUMERIC_SUPPORT = True + else: + _BIGNUMERIC_SUPPORT = False # pragma: NO COVER + +else: # pragma: NO COVER + BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER + ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER + _BIGNUMERIC_SUPPORT = False # pragma: NO COVER + + BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA = { "GEOGRAPHY": { b"ARROW:extension:name": b"google:sqlType:geography", @@ -480,6 +492,13 @@ def dataframe_to_bq_schema(dataframe, bq_schema): # If schema detection was not successful for all columns, also try with # pyarrow, if available. if unknown_type_fields: + if not pyarrow: + msg = "Could not determine the type of columns: {}".format( + ", ".join(field.name for field in unknown_type_fields) + ) + warnings.warn(msg) + return None # We cannot detect the schema in full. + # The augment_schema() helper itself will also issue unknown type # warnings if detection still fails for any of the fields. bq_schema_out = augment_schema(dataframe, bq_schema_out) @@ -654,6 +673,8 @@ def dataframe_to_parquet( This argument is ignored for ``pyarrow`` versions earlier than ``4.0.0``. """ + pyarrow = _helpers.PYARROW_VERSIONS.try_import(raise_if_error=True) + import pyarrow.parquet # type: ignore kwargs = ( diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 1f3647e71..1885ab67e 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -27,6 +27,7 @@ import json import math import os +import packaging.version import tempfile import typing from typing import ( @@ -44,6 +45,13 @@ import uuid import warnings +try: + import pyarrow # type: ignore + + _PYARROW_VERSION = packaging.version.parse(pyarrow.__version__) +except ImportError: # pragma: NO COVER + pyarrow = None + from google import resumable_media # type: ignore from google.resumable_media.requests import MultipartUpload # type: ignore from google.resumable_media.requests import ResumableUpload @@ -56,9 +64,14 @@ import google.cloud._helpers # type: ignore from google.cloud import exceptions # pytype: disable=import-error from google.cloud.client import ClientWithProject # type: ignore # pytype: disable=import-error -from google.cloud.bigquery_storage_v1.services.big_query_read.client import ( - DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO, -) + +try: + from google.cloud.bigquery_storage_v1.services.big_query_read.client import ( + DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO, + ) +except ImportError: + DEFAULT_BQSTORAGE_CLIENT_INFO = None # type: ignore + from google.cloud.bigquery import _job_helpers from google.cloud.bigquery._job_helpers import make_job_id as _make_job_id @@ -67,6 +80,7 @@ from google.cloud.bigquery._helpers import _str_or_none from google.cloud.bigquery._helpers import _verify_job_config_type from google.cloud.bigquery._helpers import _get_bigquery_host +from google.cloud.bigquery._helpers import BQ_STORAGE_VERSIONS from google.cloud.bigquery._helpers import _DEFAULT_HOST from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _pandas_helpers @@ -75,6 +89,7 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import enums from google.cloud.bigquery.enums import AutoRowIDs +from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job from google.cloud.bigquery.job import ( @@ -144,6 +159,9 @@ TIMEOUT_HEADER = "X-Server-Timeout" +# https://github.com/googleapis/python-bigquery/issues/781#issuecomment-883497414 +_PYARROW_BAD_VERSIONS = frozenset([packaging.version.Version("2.0.0")]) + class Project(object): """Wrapper for resource describing a BigQuery project. @@ -519,8 +537,20 @@ def _ensure_bqstorage_client( Returns: A BigQuery Storage API client. """ - from google.cloud import bigquery_storage + try: + from google.cloud import bigquery_storage # type: ignore + except ImportError: + warnings.warn( + "Cannot create BigQuery Storage client, the dependency " + "google-cloud-bigquery-storage is not installed." + ) + return None + try: + BQ_STORAGE_VERSIONS.verify_version() + except LegacyBigQueryStorageError as exc: + warnings.warn(str(exc)) + return None if bqstorage_client is None: bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=self._credentials, @@ -2529,6 +2559,9 @@ def load_table_from_dataframe( google.cloud.bigquery.job.LoadJob: A new load job. Raises: + ValueError: + If a usable parquet engine cannot be found. This method + requires :mod:`pyarrow` to be installed. TypeError: If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig` class. @@ -2566,6 +2599,10 @@ def load_table_from_dataframe( ) ) + if pyarrow is None and job_config.source_format == job.SourceFormat.PARQUET: + # pyarrow is now the only supported parquet engine. + raise ValueError("This method requires pyarrow to be installed") + if location is None: location = self.location @@ -2621,6 +2658,16 @@ def load_table_from_dataframe( try: if job_config.source_format == job.SourceFormat.PARQUET: + if _PYARROW_VERSION in _PYARROW_BAD_VERSIONS: + msg = ( + "Loading dataframe data in PARQUET format with pyarrow " + f"{_PYARROW_VERSION} can result in data corruption. It is " + "therefore *strongly* advised to use a different pyarrow " + "version or a different source format. " + "See: https://github.com/googleapis/python-bigquery/issues/781" + ) + warnings.warn(msg, category=RuntimeWarning) + if job_config.schema: if parquet_compression == "snappy": # adjust the default value parquet_compression = parquet_compression.upper() diff --git a/google/cloud/bigquery/exceptions.py b/google/cloud/bigquery/exceptions.py new file mode 100644 index 000000000..2bab97fea --- /dev/null +++ b/google/cloud/bigquery/exceptions.py @@ -0,0 +1,25 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class BigQueryError(Exception): + """Base class for all custom exceptions defined by the BigQuery client.""" + + +class LegacyBigQueryStorageError(BigQueryError): + """Raised when too old a version of BigQuery Storage extra is detected at runtime.""" + + +class LegacyPyarrowError(BigQueryError): + """Raised when too old a version of pyarrow package is detected at runtime.""" diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index b0286deae..e32e74129 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1593,6 +1593,10 @@ def to_arrow( headers from the query results. The column headers are derived from the destination table's schema. + Raises: + ValueError: + If the :mod:`pyarrow` library cannot be imported. + .. versionadded:: 1.17.0 """ query_result = wait_for_query(self, progress_bar_type, max_results=max_results) @@ -1694,7 +1698,7 @@ def to_dataframe( # that should only exist here in the QueryJob method. def to_geodataframe( self, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, progress_bar_type: str = None, create_bqstorage_client: bool = True, diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 613cc1b58..f92f77541 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -747,6 +747,17 @@ def _make_bqstorage_client(client, use_bqstorage_api, client_options): if not use_bqstorage_api: return None + try: + from google.cloud import bigquery_storage # type: ignore # noqa: F401 + except ImportError as err: + customized_error = ImportError( + "The default BigQuery Storage API client cannot be used, install " + "the missing google-cloud-bigquery-storage and pyarrow packages " + "to use it. Alternatively, use the classic REST API by specifying " + "the --use_rest_api magic option." + ) + raise customized_error from err + try: from google.api_core.gapic_v1 import client_info as gapic_client_info except ImportError as err: diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 96888d62d..a2110a9fb 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -29,7 +29,10 @@ except ImportError: # pragma: NO COVER pandas = None -import pyarrow # type: ignore +try: + import pyarrow # type: ignore +except ImportError: # pragma: NO COVER + pyarrow = None try: import geopandas # type: ignore @@ -52,6 +55,7 @@ import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pandas_helpers +from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields @@ -63,8 +67,9 @@ # Unconditionally import optional dependencies again to tell pytype that # they are not None, avoiding false "no attribute" errors. import pandas - import geopandas - from google.cloud import bigquery_storage + import pyarrow + import geopandas # type: ignore + from google.cloud import bigquery_storage # type: ignore from google.cloud.bigquery.dataset import DatasetReference @@ -72,6 +77,10 @@ "The geopandas library is not installed, please install " "geopandas to use the to_geodataframe() function." ) +_NO_PYARROW_ERROR = ( + "The pyarrow library is not installed, please install " + "pyarrow to use the to_arrow() function." +) _NO_SHAPELY_ERROR = ( "The shapely library is not installed, please install " "shapely to use the geography_as_object option." @@ -1585,6 +1594,17 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): if self.max_results is not None: return False + try: + from google.cloud import bigquery_storage # noqa: F401 + except ImportError: + return False + + try: + _helpers.BQ_STORAGE_VERSIONS.verify_version() + except LegacyBigQueryStorageError as exc: + warnings.warn(str(exc)) + return False + return True def _get_next_page_response(self): @@ -1654,7 +1674,7 @@ def _to_page_iterable( def to_arrow_iterable( self, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore ) -> Iterator["pyarrow.RecordBatch"]: """[Beta] Create an iterable of class:`pyarrow.RecordBatch`, to process the table as a stream. @@ -1761,8 +1781,15 @@ def to_arrow( headers from the query results. The column headers are derived from the destination table's schema. + Raises: + ValueError: If the :mod:`pyarrow` library cannot be imported. + + .. versionadded:: 1.17.0 """ + if pyarrow is None: + raise ValueError(_NO_PYARROW_ERROR) + self._maybe_warn_max_results(bqstorage_client) if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): @@ -2041,7 +2068,7 @@ def __can_cast_timestamp_ns(column): # changes to job.QueryJob.to_geodataframe() def to_geodataframe( self, - bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Dict[str, Any] = None, progress_bar_type: str = None, create_bqstorage_client: bool = True, @@ -2195,6 +2222,8 @@ def to_arrow( Returns: pyarrow.Table: An empty :class:`pyarrow.Table`. """ + if pyarrow is None: + raise ValueError(_NO_PYARROW_ERROR) return pyarrow.Table.from_arrays(()) def to_dataframe( diff --git a/setup.py b/setup.py index 5fc694c6f..9e1bfbbce 100644 --- a/setup.py +++ b/setup.py @@ -34,8 +34,7 @@ # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 "google-api-core[grpc] >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0", - "google-cloud-bigquery-storage >= 2.0.0, <3.0.0dev", - "proto-plus >= 1.22.0, <2.0.0dev", + "proto-plus >= 1.15.0, <2.0.0dev", # NOTE: Maintainers, please do not require google-cloud-core>=2.x.x # Until this issue is closed # https://github.com/googleapis/google-cloud-python/issues/10566 @@ -44,16 +43,30 @@ "packaging >= 14.3, <22.0.0dev", "protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. "python-dateutil >= 2.7.2, <3.0dev", - "pyarrow >= 3.0.0, < 11.0dev", "requests >= 2.21.0, < 3.0.0dev", ] +pyarrow_dependency = "pyarrow >= 3.0.0" extras = { # Keep the no-op bqstorage extra for backward compatibility. # See: https://github.com/googleapis/python-bigquery/issues/757 - "bqstorage": [], - "pandas": ["pandas>=1.0.0", "db-dtypes>=0.3.0,<2.0.0dev"], + "bqstorage": [ + "google-cloud-bigquery-storage >= 2.0.0, <3.0.0dev", + # Due to an issue in pip's dependency resolver, the `grpc` extra is not + # installed, even though `google-cloud-bigquery-storage` specifies it + # as `google-api-core[grpc]`. We thus need to explicitly specify it here. + # See: https://github.com/googleapis/python-bigquery/issues/83 The + # grpc.Channel.close() method isn't added until 1.32.0. + # https://github.com/grpc/grpc/pull/15254 + "grpcio >= 1.47.0, < 2.0dev", + pyarrow_dependency, + ], + "pandas": [ + "pandas>=1.1.0", + pyarrow_dependency, + "db-dtypes>=0.3.0,<2.0.0dev", + ], "ipywidgets": ["ipywidgets==7.7.1"], - "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.6.0, <2.0dev"], + "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.8.4, <2.0dev"], "ipython": ["ipython>=7.0.1,!=8.1.0"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt index 2c5b169db..149d6c496 100644 --- a/testing/constraints-3.7.txt +++ b/testing/constraints-3.7.txt @@ -23,6 +23,6 @@ protobuf==3.19.5 pyarrow==3.0.0 python-dateutil==2.7.3 requests==2.21.0 -Shapely==1.6.4.post2 +Shapely==1.8.4 six==1.13.0 -tqdm==4.7.4 \ No newline at end of file +tqdm==4.7.4 diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 25edc18e1..575898209 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -42,14 +42,11 @@ from google.cloud.bigquery.table import Table from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi, enums -from google.cloud import bigquery_storage from google.cloud import storage from google.cloud.datacatalog_v1 import types as datacatalog_types from google.cloud.datacatalog_v1 import PolicyTagManagerClient import psutil import pytest -import pyarrow -import pyarrow.types from test_utils.retry import RetryErrors from test_utils.retry import RetryInstanceState from test_utils.retry import RetryResult @@ -57,6 +54,16 @@ from . import helpers +try: + from google.cloud import bigquery_storage +except ImportError: # pragma: NO COVER + bigquery_storage = None + +try: + import pyarrow + import pyarrow.types +except ImportError: # pragma: NO COVER + pyarrow = None JOB_TIMEOUT = 120 # 2 minutes DATA_PATH = pathlib.Path(__file__).parent.parent / "data" @@ -1738,6 +1745,10 @@ def test_dbapi_fetchall_from_script(self): row_tuples = [r.values() for r in rows] self.assertEqual(row_tuples, [(5, "foo"), (6, "bar"), (7, "baz")]) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=Config.CLIENT._credentials @@ -1796,6 +1807,9 @@ def test_dbapi_dry_run_query(self): self.assertEqual(list(rows), []) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_dbapi_connection_does_not_leak_sockets(self): current_process = psutil.Process() conn_count_start = len(current_process.connections()) @@ -2263,6 +2277,10 @@ def test_create_table_rows_fetch_nested_schema(self): self.assertEqual(found[7], e_favtime) self.assertEqual(found[8], decimal.Decimal(expected["FavoriteNumber"])) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_nested_table_to_arrow(self): from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import WriteDisposition diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 34e4243c4..91305b450 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -27,7 +27,7 @@ import pytest from google.cloud import bigquery -from google.cloud import bigquery_storage + from google.cloud.bigquery import enums from . import helpers @@ -36,6 +36,9 @@ pandas = pytest.importorskip("pandas", minversion="0.23.0") numpy = pytest.importorskip("numpy") +bigquery_storage = pytest.importorskip( + "google.cloud.bigquery_storage", minversion="2.0.0" +) PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version PANDAS_INT64_VERSION = pkg_resources.parse_version("1.0.0") @@ -373,10 +376,10 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), - bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), ) num_rows = 100 @@ -390,10 +393,10 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): ("geo_col", nulls), ("int_col", nulls), ("num_col", nulls), - ("bignum_col", nulls), ("str_col", nulls), ("time_col", nulls), ("ts_col", nulls), + ("bignum_col", nulls), ] df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) @@ -469,10 +472,10 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), - bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), ) df_data = [ @@ -502,14 +505,6 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id decimal.Decimal("99999999999999999999999999999.999999999"), ], ), - ( - "bignum_col", - [ - decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), - None, - decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), - ], - ), ("str_col", ["abc", None, "def"]), ( "time_col", @@ -525,6 +520,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id ), ], ), + ( + "bignum_col", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], + ), ] df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index a45401664..a2444efdd 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -17,12 +17,15 @@ import json import mock -import pyarrow import pytest -from google.cloud import bigquery_storage -import google.cloud.bigquery_storage_v1.reader -import google.cloud.bigquery_storage_v1.services.big_query_read.client + +try: + from google.cloud import bigquery_storage + import google.cloud.bigquery_storage_v1.reader + import google.cloud.bigquery_storage_v1.services.big_query_read.client +except (ImportError, AttributeError): # pragma: NO COVER + bigquery_storage = None try: import pandas @@ -47,6 +50,12 @@ pandas = pytest.importorskip("pandas") +try: + import pyarrow + import pyarrow.types +except ImportError: # pragma: NO COVER + pyarrow = None + @pytest.fixture def table_read_options_kwarg(): @@ -89,6 +98,9 @@ def test__contains_order_by(query, expected): assert not mut._contains_order_by(query) +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) @pytest.mark.parametrize( "query", ( @@ -179,6 +191,7 @@ def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg): ) +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_arrow(): from google.cloud.bigquery.job import QueryJob as target_class @@ -265,6 +278,7 @@ def test_to_arrow(): ] +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_arrow_max_results_no_progress_bar(): from google.cloud.bigquery import table from google.cloud.bigquery.job import QueryJob as target_class @@ -300,6 +314,7 @@ def test_to_arrow_max_results_no_progress_bar(): assert tbl.num_rows == 2 +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") @mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") def test_to_arrow_w_tqdm_w_query_plan(tqdm_mock): @@ -356,6 +371,7 @@ def test_to_arrow_w_tqdm_w_query_plan(tqdm_mock): ) +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") @mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") def test_to_arrow_w_tqdm_w_pending_status(tqdm_mock): @@ -408,6 +424,7 @@ def test_to_arrow_w_tqdm_w_pending_status(tqdm_mock): ) +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") @mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm") def test_to_arrow_w_tqdm_wo_query_plan(tqdm_mock): @@ -510,6 +527,9 @@ def test_to_dataframe_ddl_query(): assert len(df) == 0 +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) def test_to_dataframe_bqstorage(table_read_options_kwarg): from google.cloud.bigquery.job import QueryJob as target_class @@ -584,6 +604,9 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg): bqstorage_client.read_rows.assert_called_once_with(stream_id) +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) def test_to_dataframe_bqstorage_no_pyarrow_compression(): from google.cloud.bigquery.job import QueryJob as target_class @@ -629,6 +652,7 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): ) +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_dataframe_column_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 2e714c707..4fb86f665 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -19,7 +19,18 @@ import mock +try: + from google.cloud import bigquery_storage # type: ignore +except ImportError: # pragma: NO COVER + bigquery_storage = None +try: + import pyarrow +except ImportError: # pragma: NO COVER + pyarrow = None + + +@unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`") class TestBQStorageVersions(unittest.TestCase): def tearDown(self): from google.cloud.bigquery import _helpers @@ -32,6 +43,37 @@ def _object_under_test(self): return _helpers.BQStorageVersions() + def _call_fut(self): + from google.cloud.bigquery import _helpers + + _helpers.BQ_STORAGE_VERSIONS._installed_version = None + return _helpers.BQ_STORAGE_VERSIONS.verify_version() + + def test_raises_no_error_w_recent_bqstorage(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + with mock.patch("google.cloud.bigquery_storage.__version__", new="2.0.0"): + try: + self._call_fut() + except LegacyBigQueryStorageError: # pragma: NO COVER + self.fail("Legacy error raised with a non-legacy dependency version.") + + def test_raises_error_w_legacy_bqstorage(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + with mock.patch("google.cloud.bigquery_storage.__version__", new="1.9.9"): + with self.assertRaises(LegacyBigQueryStorageError): + self._call_fut() + + def test_raises_error_w_unknown_bqstorage_version(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + with mock.patch("google.cloud.bigquery_storage", autospec=True) as fake_module: + del fake_module.__version__ + error_pattern = r"version found: 0.0.0" + with self.assertRaisesRegex(LegacyBigQueryStorageError, error_pattern): + self._call_fut() + def test_installed_version_returns_cached(self): versions = self._object_under_test() versions._installed_version = object() @@ -58,6 +100,7 @@ def test_is_read_session_optional_false(self): assert not versions.is_read_session_optional +@unittest.skipIf(pyarrow is None, "Requires `pyarrow`") class TestPyarrowVersions(unittest.TestCase): def tearDown(self): from google.cloud.bigquery import _helpers @@ -70,6 +113,34 @@ def _object_under_test(self): return _helpers.PyarrowVersions() + def _call_try_import(self, **kwargs): + from google.cloud.bigquery import _helpers + + _helpers.PYARROW_VERSIONS._installed_version = None + return _helpers.PYARROW_VERSIONS.try_import(**kwargs) + + def test_try_import_raises_no_error_w_recent_pyarrow(self): + from google.cloud.bigquery.exceptions import LegacyPyarrowError + + with mock.patch("pyarrow.__version__", new="5.0.0"): + try: + pyarrow = self._call_try_import(raise_if_error=True) + self.assertIsNotNone(pyarrow) + except LegacyPyarrowError: # pragma: NO COVER + self.fail("Legacy error raised with a non-legacy dependency version.") + + def test_try_import_returns_none_w_legacy_pyarrow(self): + with mock.patch("pyarrow.__version__", new="2.0.0"): + pyarrow = self._call_try_import() + self.assertIsNone(pyarrow) + + def test_try_import_raises_error_w_legacy_pyarrow(self): + from google.cloud.bigquery.exceptions import LegacyPyarrowError + + with mock.patch("pyarrow.__version__", new="2.0.0"): + with self.assertRaises(LegacyPyarrowError): + self._call_try_import(raise_if_error=True) + def test_installed_version_returns_cached(self): versions = self._object_under_test() versions._installed_version = object() diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 1a3f918eb..885cd318c 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -30,9 +30,6 @@ except ImportError: # pragma: NO COVER pandas = None -import pyarrow -import pyarrow.types - try: import geopandas except ImportError: # pragma: NO COVER @@ -41,10 +38,28 @@ import pytest from google import api_core -from google.cloud import bigquery_storage + +from google.cloud.bigquery import exceptions from google.cloud.bigquery import _helpers from google.cloud.bigquery import schema +from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT + +pyarrow = _helpers.PYARROW_VERSIONS.try_import() +if pyarrow: + import pyarrow.parquet + import pyarrow.types +else: # pragma: NO COVER + # Mock out pyarrow when missing, because methods from pyarrow.types are + # used in test parameterization. + pyarrow = mock.Mock() + +try: + from google.cloud import bigquery_storage + + _helpers.BQ_STORAGE_VERSIONS.verify_version() +except ImportError: # pragma: NO COVER + bigquery_storage = None PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") @@ -55,6 +70,12 @@ PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") +skip_if_no_bignumeric = pytest.mark.skipif( + not _BIGNUMERIC_SUPPORT, + reason="BIGNUMERIC support requires pyarrow>=3.0.0", +) + + @pytest.fixture def module_under_test(): from google.cloud.bigquery import _pandas_helpers @@ -110,6 +131,7 @@ def all_(*functions): return functools.partial(do_all, functions) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_is_datetime(): assert is_datetime(pyarrow.timestamp("us", tz=None)) assert not is_datetime(pyarrow.timestamp("ms", tz=None)) @@ -142,7 +164,12 @@ def test_all_(): ("FLOAT", "NULLABLE", pyarrow.types.is_float64), ("FLOAT64", "NULLABLE", pyarrow.types.is_float64), ("NUMERIC", "NULLABLE", is_numeric), - ("BIGNUMERIC", "NULLABLE", is_bignumeric), + pytest.param( + "BIGNUMERIC", + "NULLABLE", + is_bignumeric, + marks=skip_if_no_bignumeric, + ), ("BOOLEAN", "NULLABLE", pyarrow.types.is_boolean), ("BOOL", "NULLABLE", pyarrow.types.is_boolean), ("TIMESTAMP", "NULLABLE", is_timestamp), @@ -221,10 +248,11 @@ def test_all_(): "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_numeric(type_.value_type)), ), - ( + pytest.param( "BIGNUMERIC", "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_bignumeric(type_.value_type)), + marks=skip_if_no_bignumeric, ), ( "BOOLEAN", @@ -280,6 +308,7 @@ def test_all_(): ("UNKNOWN_TYPE", "REPEATED", is_none), ], ) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type(module_under_test, bq_type, bq_mode, is_correct_type): field = schema.SchemaField("ignored_name", bq_type, mode=bq_mode) actual = module_under_test.bq_to_arrow_data_type(field) @@ -287,6 +316,7 @@ def test_bq_to_arrow_data_type(module_under_test, bq_type, bq_mode, is_correct_t @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): fields = ( schema.SchemaField("field01", "STRING"), @@ -334,6 +364,7 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): fields = ( schema.SchemaField("field01", "STRING"), @@ -381,6 +412,7 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): assert actual.value_type.equals(expected_value_type) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): fields = ( schema.SchemaField("field1", "STRING"), @@ -417,7 +449,7 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): decimal.Decimal("999.123456789"), ], ), - ( + pytest.param( "BIGNUMERIC", [ decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), @@ -479,6 +511,7 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): ], ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): series = pandas.Series(rows, dtype="object") bq_field = schema.SchemaField("field_name", bq_type) @@ -513,6 +546,7 @@ def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows): ], ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows): rows = [pandas.Timestamp(row) for row in rows] series = pandas.Series(rows) @@ -523,6 +557,7 @@ def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_arrays(module_under_test): rows = [[1, 2, 3], [], [4, 5, 6]] series = pandas.Series(rows, dtype="object") @@ -534,6 +569,7 @@ def test_bq_to_arrow_array_w_arrays(module_under_test): @pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"]) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_structs(module_under_test, bq_type): rows = [ {"int_col": 123, "string_col": "abc"}, @@ -555,6 +591,7 @@ def test_bq_to_arrow_array_w_structs(module_under_test, bq_type): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_array_w_special_floats(module_under_test): bq_field = schema.SchemaField("field_name", "FLOAT64") rows = [float("-inf"), float("nan"), float("inf"), None] @@ -622,6 +659,7 @@ def test_bq_to_arrow_array_w_geography_type_wkb_data(module_under_test): assert array.to_pylist() == list(series) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_schema_w_unknown_type(module_under_test): fields = ( schema.SchemaField("field1", "STRING"), @@ -647,6 +685,7 @@ def test_get_column_or_index_not_found(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_get_column_or_index_with_multiindex_not_found(module_under_test): dataframe = pandas.DataFrame( {"column_name": [1, 2, 3, 4, 5, 6]}, @@ -984,6 +1023,7 @@ def test_dataframe_to_arrow_with_multiindex(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_required_fields(module_under_test): bq_schema = ( schema.SchemaField("field01", "STRING", mode="REQUIRED"), @@ -1040,6 +1080,7 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_unknown_type(module_under_test): bq_schema = ( schema.SchemaField("field00", "UNKNOWN_TYPE"), @@ -1072,6 +1113,7 @@ def test_dataframe_to_arrow_with_unknown_type(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): dict_schema = [ {"name": "field01", "type": "STRING", "mode": "REQUIRED"}, @@ -1093,6 +1135,19 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_dataframe_to_parquet_without_pyarrow(module_under_test, monkeypatch): + mock_pyarrow_import = mock.Mock() + mock_pyarrow_import.side_effect = exceptions.LegacyPyarrowError( + "pyarrow not installed" + ) + monkeypatch.setattr(_helpers.PYARROW_VERSIONS, "try_import", mock_pyarrow_import) + + with pytest.raises(exceptions.LegacyPyarrowError): + module_under_test.dataframe_to_parquet(pandas.DataFrame(), (), None) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_w_extra_fields(module_under_test): with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet( @@ -1104,6 +1159,7 @@ def test_dataframe_to_parquet_w_extra_fields(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_w_missing_fields(module_under_test): with pytest.raises(ValueError) as exc_context: module_under_test.dataframe_to_parquet( @@ -1115,6 +1171,7 @@ def test_dataframe_to_parquet_w_missing_fields(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_compression_method(module_under_test): bq_schema = (schema.SchemaField("field00", "STRING"),) dataframe = pandas.DataFrame({"field00": ["foo", "bar"]}) @@ -1134,6 +1191,34 @@ def test_dataframe_to_parquet_compression_method(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): + dataframe = pandas.DataFrame( + data=[ + {"id": 10, "status": "FOO", "execution_date": datetime.date(2019, 5, 10)}, + {"id": 20, "status": "BAR", "created_at": datetime.date(2018, 9, 12)}, + ] + ) + + no_pyarrow_patch = mock.patch(module_under_test.__name__ + ".pyarrow", None) + + with no_pyarrow_patch, warnings.catch_warnings(record=True) as warned: + detected_schema = module_under_test.dataframe_to_bq_schema( + dataframe, bq_schema=[] + ) + + assert detected_schema is None + + # a warning should also be issued + expected_warnings = [ + warning for warning in warned if "could not determine" in str(warning).lower() + ] + assert len(expected_warnings) == 1 + msg = str(expected_warnings[0]) + assert "execution_date" in msg and "created_at" in msg + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1163,6 +1248,7 @@ def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1249,6 +1335,7 @@ def test__first_array_valid_no_arrays_with_valid_items(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_augment_schema_type_detection_succeeds(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1315,6 +1402,7 @@ def test_augment_schema_type_detection_succeeds(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_augment_schema_repeated_fields(module_under_test): dataframe = pandas.DataFrame( data=[ @@ -1427,6 +1515,7 @@ def test_augment_schema_type_detection_fails_array_data(module_under_test): assert "all_none_array" in warning_msg and "empty_array" in warning_msg +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): pandas = pytest.importorskip("pandas") @@ -1457,6 +1546,9 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): assert schema_arg == expected_schema_arg +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) def test__download_table_bqstorage_stream_includes_read_session( monkeypatch, module_under_test ): @@ -1487,7 +1579,8 @@ def test__download_table_bqstorage_stream_includes_read_session( @pytest.mark.skipif( - not _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional, + bigquery_storage is None + or not _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional, reason="Requires `google-cloud-bigquery-storage` >= 2.6.0", ) def test__download_table_bqstorage_stream_omits_read_session( @@ -1527,6 +1620,9 @@ def test__download_table_bqstorage_stream_omits_read_session( (7, {"max_queue_size": None}, 7, 0), # infinite queue size ], ) +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) def test__download_table_bqstorage( module_under_test, stream_count, @@ -1577,6 +1673,7 @@ def fake_download_stream( assert queue_used.maxsize == expected_maxsize +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_unknown_field_type(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), @@ -1612,6 +1709,7 @@ def test_download_arrow_row_iterator_unknown_field_type(module_under_test): assert col.to_pylist() == [2.2, 22.22, 222.222] +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_known_field_type(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), @@ -1646,6 +1744,7 @@ def test_download_arrow_row_iterator_known_field_type(module_under_test): assert col.to_pylist() == ["2.2", "22.22", "222.222"] +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_dict_sequence_schema(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), @@ -1712,6 +1811,7 @@ def test_table_data_listpage_to_dataframe_skips_stop_iteration(module_under_test assert isinstance(dataframe, pandas.DataFrame) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_field_type_override(module_under_test): # When loading pandas data, we may need to override the type # decision based on data contents, because GEOGRAPHY data can be @@ -1744,6 +1844,7 @@ def test_bq_to_arrow_field_type_override(module_under_test): ), ], ) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_field_metadata(module_under_test, field_type, metadata): assert ( module_under_test.bq_to_arrow_field( diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index f4552cda2..22f7286db 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -27,6 +27,7 @@ import warnings import mock +import packaging import requests import pytest import pkg_resources @@ -53,15 +54,25 @@ msg = "Error importing from opentelemetry, is the installed version compatible?" raise ImportError(msg) from exc +try: + import pyarrow +except (ImportError, AttributeError): # pragma: NO COVER + pyarrow = None + import google.api_core.exceptions from google.api_core import client_info import google.cloud._helpers from google.cloud import bigquery -from google.cloud import bigquery_storage + from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from google.cloud.bigquery import ParquetOptions +try: + from google.cloud import bigquery_storage +except (ImportError, AttributeError): # pragma: NO COVER + bigquery_storage = None +from test_utils.imports import maybe_fail_import from tests.unit.helpers import make_connection PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") @@ -614,6 +625,9 @@ def test_get_dataset(self): self.assertEqual(dataset.dataset_id, self.DS_ID) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_ensure_bqstorage_client_creating_new_instance(self): mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) mock_client_instance = object() @@ -636,6 +650,55 @@ def test_ensure_bqstorage_client_creating_new_instance(self): client_info=mock.sentinel.client_info, ) + def test_ensure_bqstorage_client_missing_dependency(self): + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + + def fail_bqstorage_import(name, globals, locals, fromlist, level): + # NOTE: *very* simplified, assuming a straightforward absolute import + return "bigquery_storage" in name or ( + fromlist is not None and "bigquery_storage" in fromlist + ) + + no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) + + with no_bqstorage, warnings.catch_warnings(record=True) as warned: + bqstorage_client = client._ensure_bqstorage_client() + + self.assertIsNone(bqstorage_client) + matching_warnings = [ + warning + for warning in warned + if "not installed" in str(warning) + and "google-cloud-bigquery-storage" in str(warning) + ] + assert matching_warnings, "Missing dependency warning not raised." + + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_ensure_bqstorage_client_obsolete_dependency(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + + patcher = mock.patch( + "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + bqstorage_client = client._ensure_bqstorage_client() + + self.assertIsNone(bqstorage_client) + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_ensure_bqstorage_client_existing_client_check_passes(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -647,6 +710,29 @@ def test_ensure_bqstorage_client_existing_client_check_passes(self): self.assertIs(bqstorage_client, mock_storage_client) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_ensure_bqstorage_client_existing_client_check_fails(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + mock_storage_client = mock.sentinel.mock_storage_client + + patcher = mock.patch( + "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + bqstorage_client = client._ensure_bqstorage_client(mock_storage_client) + + self.assertIsNone(bqstorage_client) + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + def test_create_routine_w_minimal_resource(self): from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference @@ -6833,6 +6919,7 @@ def test_load_table_from_file_w_invalid_job_config(self): assert "Expected an instance of LoadJobConfig" in err_msg @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -6928,6 +7015,7 @@ def test_load_table_from_dataframe(self): assert "description" not in field @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_client_location(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -6972,6 +7060,7 @@ def test_load_table_from_dataframe_w_client_location(self): assert sent_config.source_format == job.SourceFormat.PARQUET @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7026,6 +7115,7 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel assert job_config.to_api_repr() == original_config_copy.to_api_repr() @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7081,6 +7171,7 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): assert job_config.to_api_repr() == original_config_copy.to_api_repr() @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_parquet_options_none(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7132,6 +7223,7 @@ def test_load_table_from_dataframe_w_parquet_options_none(self): assert sent_config.parquet_options.enable_list_inference is True @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_list_inference_none(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7191,6 +7283,7 @@ def test_load_table_from_dataframe_w_list_inference_none(self): assert job_config.to_api_repr() == original_config_copy.to_api_repr() @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_list_inference_false(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7251,6 +7344,7 @@ def test_load_table_from_dataframe_w_list_inference_false(self): assert job_config.to_api_repr() == original_config_copy.to_api_repr() @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_custom_job_config_w_wrong_source_format(self): from google.cloud.bigquery import job @@ -7270,6 +7364,7 @@ def test_load_table_from_dataframe_w_custom_job_config_w_wrong_source_format(sel assert "Got unexpected source_format:" in str(exc.value) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_automatic_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7370,6 +7465,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7429,6 +7525,7 @@ def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self): assert sent_config.schema is None @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_index_and_auto_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7490,6 +7587,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): assert sent_schema == expected_sent_schema @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_unknown_table(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES @@ -7528,6 +7626,7 @@ def test_load_table_from_dataframe_unknown_table(self): pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, "Only `pandas version >=1.0.0` supported", ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nullable_int64_datatype(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7575,6 +7674,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self): pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, "Only `pandas version >=1.0.0` supported", ) + # @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7619,6 +7719,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se ) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_struct_fields(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7678,6 +7779,7 @@ def test_load_table_from_dataframe_struct_fields(self): assert sent_config.schema == schema @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_array_fields(self): """Test that a DataFrame with array columns can be uploaded correctly. @@ -7742,6 +7844,7 @@ def test_load_table_from_dataframe_array_fields(self): assert sent_config.schema == schema @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_array_fields_w_auto_schema(self): """Test that a DataFrame with array columns can be uploaded correctly. @@ -7804,6 +7907,7 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): assert sent_config.schema == expected_schema @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_partial_schema(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -7887,6 +7991,7 @@ def test_load_table_from_dataframe_w_partial_schema(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_partial_schema_extra_types(self): from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7923,6 +8028,7 @@ def test_load_table_from_dataframe_w_partial_schema_extra_types(self): assert "unknown_col" in message @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): from google.cloud.bigquery import job from google.cloud.bigquery.schema import SchemaField @@ -7955,6 +8061,74 @@ def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): assert call_args.kwargs.get("parquet_compression") == "LZ4" @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_wo_pyarrow_raises_error(self): + client = self._make_client() + records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] + dataframe = pandas.DataFrame(records) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + pyarrow_patch = mock.patch("google.cloud.bigquery.client.pyarrow", None) + to_parquet_patch = mock.patch.object( + dataframe, "to_parquet", wraps=dataframe.to_parquet + ) + + with load_patch, get_table_patch, pyarrow_patch, to_parquet_patch: + with pytest.raises(ValueError): + client.load_table_from_dataframe( + dataframe, + self.TABLE_REF, + location=self.LOCATION, + parquet_compression="gzip", + ) + + def test_load_table_from_dataframe_w_bad_pyarrow_issues_warning(self): + pytest.importorskip("pandas", reason="Requires `pandas`") + pytest.importorskip("pyarrow", reason="Requires `pyarrow`") + + client = self._make_client() + records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] + dataframe = pandas.DataFrame(records) + + pyarrow_version_patch = mock.patch( + "google.cloud.bigquery.client._PYARROW_VERSION", + packaging.version.parse("2.0.0"), # A known bad version of pyarrow. + ) + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + with load_patch, get_table_patch, pyarrow_version_patch: + with warnings.catch_warnings(record=True) as warned: + client.load_table_from_dataframe( + dataframe, + self.TABLE_REF, + location=self.LOCATION, + ) + + expected_warnings = [ + warning for warning in warned if "pyarrow" in str(warning).lower() + ] + assert len(expected_warnings) == 1 + assert issubclass(expected_warnings[0].category, RuntimeWarning) + msg = str(expected_warnings[0].message) + assert "pyarrow 2.0.0" in msg + assert "data corruption" in msg + + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nulls(self): """Test that a DataFrame with null columns can be uploaded if a BigQuery schema is specified. diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index 7cc1f11c3..fae0c17e9 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -21,6 +21,11 @@ import pytest +try: + import pyarrow +except ImportError: # pragma: NO COVER + pyarrow = None + import google.cloud._helpers from google.cloud.bigquery import query, table from google.cloud.bigquery.dbapi import _helpers @@ -210,6 +215,7 @@ def test_empty_iterable(self): result = _helpers.to_bq_table_rows(rows_iterable) self.assertEqual(list(result), []) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_non_empty_iterable(self): rows_iterable = [ dict( diff --git a/tests/unit/test_dbapi_connection.py b/tests/unit/test_dbapi_connection.py index e96ab55d7..67777f923 100644 --- a/tests/unit/test_dbapi_connection.py +++ b/tests/unit/test_dbapi_connection.py @@ -17,7 +17,10 @@ import mock -from google.cloud import bigquery_storage +try: + from google.cloud import bigquery_storage +except ImportError: # pragma: NO COVER + bigquery_storage = None class TestConnection(unittest.TestCase): @@ -37,6 +40,8 @@ def _mock_client(self): return mock_client def _mock_bqstorage_client(self): + # Assumption: bigquery_storage exists. It's the test's responisbility to + # not use this helper or skip itself if bqstorage is not installed. mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) mock_client._transport = mock.Mock(spec=["channel"]) mock_client._transport.grpc_channel = mock.Mock(spec=["close"]) @@ -53,6 +58,9 @@ def test_ctor_wo_bqstorage_client(self): self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, None) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_ctor_w_bqstorage_client(self): from google.cloud.bigquery.dbapi import Connection @@ -82,6 +90,9 @@ def test_connect_wo_client(self, mock_client): self.assertIsNotNone(connection._client) self.assertIsNotNone(connection._bqstorage_client) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_connect_w_client(self): from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import Connection @@ -97,6 +108,9 @@ def test_connect_w_client(self): self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, mock_bqstorage_client) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_connect_w_both_clients(self): from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import Connection @@ -130,6 +144,9 @@ def test_raises_error_if_closed(self): ): getattr(connection, method)() + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_close_closes_all_created_bigquery_clients(self): client = self._mock_client() bqstorage_client = self._mock_bqstorage_client() @@ -152,6 +169,9 @@ def test_close_closes_all_created_bigquery_clients(self): self.assertTrue(client.close.called) self.assertTrue(bqstorage_client._transport.grpc_channel.close.called) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_close_does_not_close_bigquery_clients_passed_to_it(self): client = self._mock_client() bqstorage_client = self._mock_bqstorage_client() diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index d672c0f6c..b550bbce0 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -18,8 +18,17 @@ import pytest +try: + import pyarrow +except ImportError: # pragma: NO COVER + pyarrow = None + from google.api_core import exceptions -from google.cloud import bigquery_storage + +try: + from google.cloud import bigquery_storage +except ImportError: # pragma: NO COVER + bigquery_storage = None from tests.unit.helpers import _to_pyarrow @@ -269,6 +278,10 @@ def test_fetchall_w_row(self): self.assertEqual(len(rows), 1) self.assertEqual(rows[0], (1,)) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_fetchall_w_bqstorage_client_fetch_success(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery import table @@ -322,6 +335,9 @@ def test_fetchall_w_bqstorage_client_fetch_success(self): self.assertEqual(sorted_row_data, expected_row_data) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_fetchall_w_bqstorage_client_fetch_no_rows(self): from google.cloud.bigquery import dbapi @@ -344,6 +360,9 @@ def test_fetchall_w_bqstorage_client_fetch_no_rows(self): # check the data returned self.assertEqual(rows, []) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery import table @@ -375,6 +394,10 @@ def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): # the default client was not used mock_client.list_rows.assert_not_called() + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_fetchall_w_bqstorage_client_no_arrow_compression(self): from google.cloud.bigquery import dbapi from google.cloud.bigquery import table diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index fdfb16d16..c0aa5d85e 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -76,6 +76,19 @@ def ipython_ns_cleanup(): del ip.user_ns[name] +@pytest.fixture(scope="session") +def missing_bq_storage(): + """Provide a patcher that can make the bigquery storage import to fail.""" + + def fail_if(name, globals, locals, fromlist, level): + # NOTE: *very* simplified, assuming a straightforward absolute import + return "bigquery_storage" in name or ( + fromlist is not None and "bigquery_storage" in fromlist + ) + + return maybe_fail_import(predicate=fail_if) + + @pytest.fixture(scope="session") def missing_grpcio_lib(): """Provide a patcher that can make the gapic library import to fail.""" @@ -310,6 +323,9 @@ def test__make_bqstorage_client_false(): assert got is None +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) def test__make_bqstorage_client_true(): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True @@ -321,6 +337,53 @@ def test__make_bqstorage_client_true(): assert isinstance(got, bigquery_storage.BigQueryReadClient) +def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + test_client = bigquery.Client( + project="test_project", credentials=credentials_mock, location="test_location" + ) + + with pytest.raises(ImportError) as exc_context, missing_bq_storage: + magics._make_bqstorage_client(test_client, True, {}) + + error_msg = str(exc_context.value) + assert "google-cloud-bigquery-storage" in error_msg + assert "pyarrow" in error_msg + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test__make_bqstorage_client_true_obsolete_dependency(): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + test_client = bigquery.Client( + project="test_project", credentials=credentials_mock, location="test_location" + ) + + patcher = mock.patch( + "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + got = magics._make_bqstorage_client(test_client, True, {}) + + assert got is None + + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test__make_bqstorage_client_true_missing_gapic(missing_grpcio_lib): credentials_mock = mock.create_autospec( @@ -376,6 +439,9 @@ def test_extension_load(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) def test_bigquery_magic_without_optional_arguments(monkeypatch): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") @@ -538,9 +604,10 @@ def test_bigquery_magic_clears_display_in_non_verbose_mode(): @pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): - pandas = pytest.importorskip("pandas") - ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") mock_credentials = mock.create_autospec( @@ -603,6 +670,9 @@ def warning_match(warning): @pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) def test_bigquery_magic_with_rest_client_requested(monkeypatch): pandas = pytest.importorskip("pandas") @@ -830,6 +900,9 @@ def test_bigquery_magic_w_table_id_and_destination_var(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_w_table_id_and_bqstorage_client(): ip = IPython.get_ipython() diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index f542c7523..a79b98881 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -22,18 +22,33 @@ import warnings import mock -import pyarrow -import pyarrow.types +import pkg_resources import pytest import google.api_core.exceptions +from test_utils.imports import maybe_fail_import from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.dataset import DatasetReference -from google.cloud import bigquery_storage -from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( - grpc as big_query_read_grpc_transport, -) +try: + from google.cloud import bigquery_storage + from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( + grpc as big_query_read_grpc_transport, + ) +except ImportError: # pragma: NO COVER + bigquery_storage = None + big_query_read_grpc_transport = None + +from google.cloud.bigquery import _helpers + +pyarrow = _helpers.PYARROW_VERSIONS.try_import() +PYARROW_VERSION = pkg_resources.parse_version("0.0.1") + +if pyarrow: + import pyarrow.types + + PYARROW_VERSION = pkg_resources.parse_version(pyarrow.__version__) try: import pandas @@ -52,7 +67,7 @@ except (ImportError, AttributeError): # pragma: NO COVER tqdm = None -from google.cloud.bigquery.dataset import DatasetReference +PYARROW_TIMESTAMP_VERSION = pkg_resources.parse_version("2.0.0") def _mock_client(): @@ -1902,12 +1917,20 @@ def test_total_rows_eq_zero(self): row_iterator = self._make_one() self.assertEqual(row_iterator.total_rows, 0) + @mock.patch("google.cloud.bigquery.table.pyarrow", new=None) + def test_to_arrow_error_if_pyarrow_is_none(self): + row_iterator = self._make_one() + with self.assertRaises(ValueError): + row_iterator.to_arrow() + + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): row_iterator = self._make_one() tbl = row_iterator.to_arrow() self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 0) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_iterable(self): row_iterator = self._make_one() arrow_iter = row_iterator.to_arrow_iterable() @@ -2192,6 +2215,49 @@ def test__validate_bqstorage_returns_false_if_max_results_set(self): ) self.assertFalse(result) + def test__validate_bqstorage_returns_false_if_missing_dependency(self): + iterator = self._make_one(first_page_response=None) # not cached + + def fail_bqstorage_import(name, globals, locals, fromlist, level): + # NOTE: *very* simplified, assuming a straightforward absolute import + return "bigquery_storage" in name or ( + fromlist is not None and "bigquery_storage" in fromlist + ) + + no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) + + with no_bqstorage: + result = iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + + self.assertFalse(result) + + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + iterator = self._make_one(first_page_response=None) # not cached + + patcher = mock.patch( + "google.cloud.bigquery.table._helpers.BQ_STORAGE_VERSIONS.verify_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + result = iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + + self.assertFalse(result) + + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_iterable(self): from google.cloud.bigquery.schema import SchemaField @@ -2292,6 +2358,10 @@ def test_to_arrow_iterable(self): [[{"name": "Bepples Phlyntstone", "age": 0}, {"name": "Dino", "age": 4}]], ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_arrow_iterable_w_bqstorage(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2367,6 +2437,7 @@ def test_to_arrow_iterable_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): from google.cloud.bigquery.schema import SchemaField @@ -2448,6 +2519,7 @@ def test_to_arrow(self): ], ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_nulls(self): from google.cloud.bigquery.schema import SchemaField @@ -2480,6 +2552,7 @@ def test_to_arrow_w_nulls(self): self.assertEqual(names, ["Donkey", "Diddy", "Dixie", None]) self.assertEqual(ages, [32, 29, None, 111]) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_unknown_type(self): from google.cloud.bigquery.schema import SchemaField @@ -2522,6 +2595,7 @@ def test_to_arrow_w_unknown_type(self): warning = warned[0] self.assertTrue("sport" in str(warning)) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_w_empty_table(self): from google.cloud.bigquery.schema import SchemaField @@ -2560,6 +2634,10 @@ def test_to_arrow_w_empty_table(self): self.assertEqual(child_field.type.value_type[0].name, "name") self.assertEqual(child_field.type.value_type[1].name, "age") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self): from google.cloud.bigquery.schema import SchemaField @@ -2600,6 +2678,10 @@ def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self): ) mock_client._ensure_bqstorage_client.assert_not_called() + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self): from google.cloud.bigquery.schema import SchemaField @@ -2636,6 +2718,10 @@ def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self): self.assertFalse(matches) mock_client._ensure_bqstorage_client.assert_not_called() + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_arrow_w_bqstorage(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2713,6 +2799,10 @@ def test_to_arrow_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_arrow_w_bqstorage_creates_client(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2740,6 +2830,7 @@ def test_to_arrow_w_bqstorage_creates_client(self): mock_client._ensure_bqstorage_client.assert_called_once() bqstorage_client._transport.grpc_channel.close.assert_called_once() + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): from google.cloud.bigquery.schema import SchemaField @@ -2755,17 +2846,25 @@ def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): api_request = mock.Mock(return_value={"rows": rows}) mock_client = _mock_client() - mock_client._ensure_bqstorage_client.return_value = None row_iterator = self._make_one(mock_client, api_request, path, schema) - tbl = row_iterator.to_arrow(create_bqstorage_client=True) + def mock_verify_version(): + raise _helpers.LegacyBigQueryStorageError("no bqstorage") - # The client attempted to create a BQ Storage client, and even though - # that was not possible, results were still returned without errors. - mock_client._ensure_bqstorage_client.assert_called_once() + with mock.patch( + "google.cloud.bigquery._helpers.BQ_STORAGE_VERSIONS.verify_version", + mock_verify_version, + ): + tbl = row_iterator.to_arrow(create_bqstorage_client=True) + + mock_client._ensure_bqstorage_client.assert_not_called() self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 2) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_arrow_w_bqstorage_no_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -2802,6 +2901,7 @@ def test_to_arrow_w_bqstorage_no_streams(self): self.assertEqual(actual_table.schema[1].name, "colC") self.assertEqual(actual_table.schema[2].name, "colB") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm_gui") @mock.patch("tqdm.notebook.tqdm") @@ -2936,6 +3036,10 @@ def test_to_dataframe_iterable_with_dtypes(self): self.assertEqual(df_2["age"][0], 33) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_iterable_w_bqstorage(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3100,6 +3204,7 @@ def test_to_dataframe(self): self.assertEqual(df.age.dtype.name, "Int64") @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): from google.cloud.bigquery.schema import SchemaField @@ -3127,6 +3232,7 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_datetime_out_of_pyarrow_bounds(self): from google.cloud.bigquery.schema import SchemaField @@ -3575,6 +3681,9 @@ def test_to_dataframe_max_results_w_create_bqstorage_client_no_warning(self): mock_client._ensure_bqstorage_client.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_dataframe_w_bqstorage_creates_client(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3603,6 +3712,9 @@ def test_to_dataframe_w_bqstorage_creates_client(self): bqstorage_client._transport.grpc_channel.close.assert_called_once() @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_dataframe_w_bqstorage_no_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3628,7 +3740,11 @@ def test_to_dataframe_w_bqstorage_no_streams(self): self.assertEqual(list(got), column_names) self.assertTrue(got.empty) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_logs_session(self): from google.cloud.bigquery.table import Table @@ -3650,6 +3766,10 @@ def test_to_dataframe_w_bqstorage_logs_session(self): ) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_empty_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3701,6 +3821,10 @@ def test_to_dataframe_w_bqstorage_empty_streams(self): self.assertTrue(got.empty) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_nonempty(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3777,6 +3901,10 @@ def test_to_dataframe_w_bqstorage_nonempty(self): bqstorage_client._transport.grpc_channel.close.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -3827,6 +3955,10 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): self.assertTrue(got.index.is_unique) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm") def test_to_dataframe_w_bqstorage_updates_progress_bar(self, tqdm_mock): @@ -3902,6 +4034,10 @@ def blocking_to_arrow(*args, **kwargs): tqdm_mock().close.assert_called_once() @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_exits_on_keyboardinterrupt(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -4018,6 +4154,9 @@ def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index(self): self.assertTrue(df.index.is_unique) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_dataframe_w_bqstorage_raises_auth_error(self): from google.cloud.bigquery import table as mut @@ -4036,6 +4175,9 @@ def test_to_dataframe_w_bqstorage_raises_auth_error(self): with pytest.raises(google.api_core.exceptions.Forbidden): row_iterator.to_dataframe(bqstorage_client=bqstorage_client) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_dataframe_w_bqstorage_partition(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -4053,6 +4195,9 @@ def test_to_dataframe_w_bqstorage_partition(self): with pytest.raises(ValueError): row_iterator.to_dataframe(bqstorage_client) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_to_dataframe_w_bqstorage_snapshot(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -4071,6 +4216,10 @@ def test_to_dataframe_w_bqstorage_snapshot(self): row_iterator.to_dataframe(bqstorage_client) @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut @@ -4773,6 +4922,9 @@ def test_set_expiration_w_none(self): assert time_partitioning._properties["expirationMs"] is None +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) @pytest.mark.parametrize( "table_path", ( diff --git a/tests/unit/test_table_pandas.py b/tests/unit/test_table_pandas.py index 943baa326..5778467a5 100644 --- a/tests/unit/test_table_pandas.py +++ b/tests/unit/test_table_pandas.py @@ -16,12 +16,12 @@ import decimal from unittest import mock -import pyarrow import pytest from google.cloud import bigquery pandas = pytest.importorskip("pandas") +pyarrow = pytest.importorskip("pyarrow", minversion="3.0.0") TEST_PATH = "/v1/project/test-proj/dataset/test-dset/table/test-tbl/data"