Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: remove pytz dependency and require pyarrow>=3.0.0 #875

Merged
merged 7 commits into from
Aug 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions docs/snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,6 @@ def test_update_table_expiration(client, to_delete):

# [START bigquery_update_table_expiration]
import datetime
import pytz

# from google.cloud import bigquery
# client = bigquery.Client()
Expand All @@ -375,7 +374,9 @@ def test_update_table_expiration(client, to_delete):
assert table.expires is None

# set table to expire 5 days from now
expiration = datetime.datetime.now(pytz.utc) + datetime.timedelta(days=5)
expiration = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(
days=5
)
table.expires = expiration
table = client.update_table(table, ["expires"]) # API request

Expand Down
17 changes: 4 additions & 13 deletions google/cloud/bigquery/_pandas_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
import queue
import warnings

from packaging import version

try:
import pandas
except ImportError: # pragma: NO COVER
Expand Down Expand Up @@ -110,6 +108,7 @@ def pyarrow_timestamp():
# This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py
# When modifying it be sure to update it there as well.
BQ_TO_ARROW_SCALARS = {
"BIGNUMERIC": pyarrow_bignumeric,
"BOOL": pyarrow.bool_,
"BOOLEAN": pyarrow.bool_,
"BYTES": pyarrow.binary,
Expand Down Expand Up @@ -146,23 +145,15 @@ def pyarrow_timestamp():
pyarrow.date64().id: "DATETIME", # because millisecond resolution
pyarrow.binary().id: "BYTES",
pyarrow.string().id: "STRING", # also alias for pyarrow.utf8()
# The exact scale and precision don't matter, see below.
pyarrow.decimal128(38, scale=9).id: "NUMERIC",
}

if version.parse(pyarrow.__version__) >= version.parse("3.0.0"):
BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric
# The exact decimal's scale and precision are not important, as only
# the type ID matters, and it's the same for all decimal256 instances.
ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC"
_BIGNUMERIC_SUPPORT = True
else:
_BIGNUMERIC_SUPPORT = False
pyarrow.decimal128(38, scale=9).id: "NUMERIC",
pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC",
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

W00t!


else: # pragma: NO COVER
BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER
ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER
_BIGNUMERIC_SUPPORT = False # pragma: NO COVER


def bq_to_arrow_struct_data_type(field):
Expand Down
3 changes: 1 addition & 2 deletions google/cloud/bigquery/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import datetime
import functools
import operator
import pytz
import typing
from typing import Any, Dict, Iterable, Iterator, Optional, Tuple
import warnings
Expand Down Expand Up @@ -1969,7 +1968,7 @@ def to_dataframe(
# Pandas, we set the timestamp_as_object parameter to True, if necessary.
types_to_check = {
pyarrow.timestamp("us"),
pyarrow.timestamp("us", tz=pytz.UTC),
pyarrow.timestamp("us", tz=datetime.timezone.utc),
}

for column in record_batch:
Expand Down
3 changes: 1 addition & 2 deletions samples/client_query_w_timestamp_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ def client_query_w_timestamp_params():
# [START bigquery_query_params_timestamps]
import datetime

import pytz
from google.cloud import bigquery

# Construct a BigQuery client object.
Expand All @@ -30,7 +29,7 @@ def client_query_w_timestamp_params():
bigquery.ScalarQueryParameter(
"ts_value",
"TIMESTAMP",
datetime.datetime(2016, 12, 7, 8, 0, tzinfo=pytz.UTC),
datetime.datetime(2016, 12, 7, 8, 0, tzinfo=datetime.timezone.utc),
)
]
)
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@
# grpc.Channel.close() method isn't added until 1.32.0.
# https://github.com/grpc/grpc/pull/15254
"grpcio >= 1.38.1, < 2.0dev",
"pyarrow >= 1.0.0, < 6.0dev",
"pyarrow >= 3.0.0, < 6.0dev",
],
"pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 6.0dev"],
"pandas": ["pandas>=0.23.0", "pyarrow >= 3.0.0, < 6.0dev"],
"bignumeric_type": ["pyarrow >= 3.0.0, < 6.0dev"],
"tqdm": ["tqdm >= 4.7.4, <5.0.0dev"],
"opentelemetry": [
Expand Down
2 changes: 1 addition & 1 deletion testing/constraints-3.6.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ opentelemetry-sdk==0.11b0
pandas==0.23.0
proto-plus==1.10.0
protobuf==3.12.0
pyarrow==1.0.0
pyarrow==3.0.0
requests==2.18.0
six==1.13.0
tqdm==4.7.4
14 changes: 5 additions & 9 deletions tests/system/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
import psutil
import pytest

from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT
from . import helpers

try:
Expand Down Expand Up @@ -1972,15 +1971,12 @@ def test_query_w_query_params(self):
"expected": {"friends": [phred_name, bharney_name]},
"query_parameters": [with_friends_param],
},
{
"sql": "SELECT @bignum_param",
"expected": bignum,
"query_parameters": [bignum_param],
},
]
if _BIGNUMERIC_SUPPORT:
examples.append(
{
"sql": "SELECT @bignum_param",
"expected": bignum,
"query_parameters": [bignum_param],
}
)

for example in examples:
jconfig = QueryJobConfig()
Expand Down
42 changes: 18 additions & 24 deletions tests/system/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,8 @@
import google.api_core.retry
import pkg_resources
import pytest
import pytz

from google.cloud import bigquery
from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT
from . import helpers


Expand Down Expand Up @@ -64,7 +62,7 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i
datetime.datetime(2012, 3, 14, 15, 16),
],
dtype="datetime64[ns]",
).dt.tz_localize(pytz.utc),
).dt.tz_localize(datetime.timezone.utc),
),
(
"dt_col",
Expand Down Expand Up @@ -189,12 +187,11 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id):
bigquery.SchemaField("geo_col", "GEOGRAPHY"),
bigquery.SchemaField("int_col", "INTEGER"),
bigquery.SchemaField("num_col", "NUMERIC"),
bigquery.SchemaField("bignum_col", "BIGNUMERIC"),
bigquery.SchemaField("str_col", "STRING"),
bigquery.SchemaField("time_col", "TIME"),
bigquery.SchemaField("ts_col", "TIMESTAMP"),
)
if _BIGNUMERIC_SUPPORT:
scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),)

table_schema = scalars_schema + (
# TODO: Array columns can't be read due to NULLABLE versus REPEATED
Expand All @@ -216,12 +213,11 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id):
("geo_col", nulls),
("int_col", nulls),
("num_col", nulls),
("bignum_col", nulls),
("str_col", nulls),
("time_col", nulls),
("ts_col", nulls),
]
if _BIGNUMERIC_SUPPORT:
df_data.append(("bignum_col", nulls))
df_data = collections.OrderedDict(df_data)
dataframe = pandas.DataFrame(df_data, columns=df_data.keys())

Expand Down Expand Up @@ -297,12 +293,11 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id
bigquery.SchemaField("geo_col", "GEOGRAPHY"),
bigquery.SchemaField("int_col", "INTEGER"),
bigquery.SchemaField("num_col", "NUMERIC"),
bigquery.SchemaField("bignum_col", "BIGNUMERIC"),
bigquery.SchemaField("str_col", "STRING"),
bigquery.SchemaField("time_col", "TIME"),
bigquery.SchemaField("ts_col", "TIMESTAMP"),
)
if _BIGNUMERIC_SUPPORT:
scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),)

table_schema = scalars_schema + (
# TODO: Array columns can't be read due to NULLABLE versus REPEATED
Expand Down Expand Up @@ -340,6 +335,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id
decimal.Decimal("99999999999999999999999999999.999999999"),
],
),
(
"bignum_col",
[
decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)),
None,
decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)),
],
),
("str_col", ["abc", None, "def"]),
(
"time_col",
Expand All @@ -348,23 +351,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id
(
"ts_col",
[
datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc),
None,
datetime.datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc),
datetime.datetime(
9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc
),
],
),
]
if _BIGNUMERIC_SUPPORT:
df_data.append(
(
"bignum_col",
[
decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)),
None,
decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)),
],
)
)
df_data = collections.OrderedDict(df_data)
dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys())

Expand Down Expand Up @@ -484,10 +478,10 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv(
(
"ts_col",
[
datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc),
None,
datetime.datetime(
9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc
9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc
),
],
),
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/job/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,11 +295,11 @@ def test_user_email(self):
@staticmethod
def _datetime_and_millis():
import datetime
import pytz
from google.cloud._helpers import _millis

now = datetime.datetime.utcnow().replace(
microsecond=123000, tzinfo=pytz.UTC # stats timestamps have ms precision
microsecond=123000,
tzinfo=datetime.timezone.utc, # stats timestamps have ms precision
)
return now, _millis(now)

Expand Down
Loading