Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-35040: [Python] Skip test_cast_timestamp_to_string on Windows because it requires tz database #35735

Merged
merged 4 commits into from
Jun 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 23 additions & 21 deletions python/pyarrow/tests/test_compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
import pyarrow as pa
import pyarrow.compute as pc
from pyarrow.lib import ArrowNotImplementedError
from pyarrow.tests import util


all_array_types = [
('bool', [True, False, False, True, True]),
Expand Down Expand Up @@ -180,17 +182,19 @@ def test_option_class_equality():
pc.WeekOptions(week_starts_monday=True, count_from_zero=False,
first_week_is_fully_in_year=False),
]
# TODO: We should test on windows once ARROW-13168 is resolved.
# Timezone database is not available on Windows yet
if sys.platform != 'win32':
# Timezone database might not be installed on Windows
if sys.platform != "win32" or util.windows_has_tzdata():
options.append(pc.AssumeTimezoneOptions("Europe/Ljubljana"))

classes = {type(option) for option in options}

for cls in exported_option_classes:
# Timezone database is not available on Windows yet
if cls not in classes and sys.platform != 'win32' and \
cls != pc.AssumeTimezoneOptions:
# Timezone database might not be installed on Windows
if (
cls not in classes
and (sys.platform != "win32" or util.windows_has_tzdata())
and cls != pc.AssumeTimezoneOptions
):
try:
options.append(cls())
except TypeError:
Expand Down Expand Up @@ -1846,17 +1850,18 @@ def test_strptime():
assert got == pa.array([None, None, None], type=pa.timestamp('s'))


# TODO: We should test on windows once ARROW-13168 is resolved.
@pytest.mark.pandas
@pytest.mark.skipif(sys.platform == 'win32',
reason="Timezone database is not available on Windows yet")
@pytest.mark.skipif(sys.platform == "win32" and not util.windows_has_tzdata(),
reason="Timezone database is not installed on Windows")
def test_strftime():
times = ["2018-03-10 09:00", "2038-01-31 12:23", None]
timezones = ["CET", "UTC", "Europe/Ljubljana"]

formats = ["%a", "%A", "%w", "%d", "%b", "%B", "%m", "%y", "%Y", "%H",
"%I", "%p", "%M", "%z", "%Z", "%j", "%U", "%W", "%c", "%x",
"%X", "%%", "%G", "%V", "%u"]
formats = ["%a", "%A", "%w", "%d", "%b", "%B", "%m", "%y", "%Y", "%H", "%I",
"%p", "%M", "%z", "%Z", "%j", "%U", "%W", "%%", "%G", "%V", "%u"]
if sys.platform != "win32":
# Locale-dependent formats don't match on Windows
formats.extend(["%c", "%x", "%X"])
Comment on lines +1862 to +1864
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error on Appveyor we got was:

    @pytest.mark.skipif(sys.platform == "win32" and not util.windows_has_tzdata(),
                        reason="Timezone database is not installed on Windows")
    def test_strftime():
        times = ["2018-03-10 09:00", "2038-01-31 12:23", None]
        timezones = ["CET", "UTC", "Europe/Ljubljana"]
    
        formats = ["%a", "%A", "%w", "%d", "%b", "%B", "%m", "%y", "%Y", "%H",
                   "%I", "%p", "%M", "%z", "%Z", "%j", "%U", "%W", "%c", "%x",
                   "%X", "%%", "%G", "%V", "%u"]
    
        for timezone in timezones:
            ts = pd.to_datetime(times).tz_localize(timezone)
            for unit in ["s", "ms", "us", "ns"]:
                tsa = pa.array(ts, type=pa.timestamp(unit, timezone))
                for fmt in formats:
                    options = pc.StrftimeOptions(fmt)
                    result = pc.strftime(tsa, options=options)
                    expected = pa.array(ts.strftime(fmt))
>                   assert result.equals(expected)
E                   assert False
E                    +  where False = <built-in method equals of pyarrow.lib.StringArray object at 0x0000023767338600>(<pyarrow.lib.StringArray object at 0x0000023767338830>\n[\n  "Sat Mar 10 09:00:00 2018",\n  "Sun Jan 31 12:23:00 2038",\n  null\n])
E                    +    where <built-in method equals of pyarrow.lib.StringArray object at 0x0000023767338600> = <pyarrow.lib.StringArray object at 0x0000023767338600>\n[\n  "03/10/18 09:00:00",\n  "01/31/38 12:23:00",\n  null\n].equals
pyarrow\tests\test_compute.py:1872: AssertionError

So it seems that we create a string like "Sat Mar 10 09:00:00 2018", but the python version we compare with gives "03/10/18 09:00:00". According to docs for %c, the former (our result) is actually correct.
But since we are checking matching results in Python in this test, just skipping the ones where those don't match.


for timezone in timezones:
ts = pd.to_datetime(times).tz_localize(timezone)
Expand Down Expand Up @@ -2029,18 +2034,16 @@ def test_extract_datetime_components():
_check_datetime_components(timestamps)

# Test timezone aware timestamp array
if sys.platform == 'win32':
# TODO: We should test on windows once ARROW-13168 is resolved.
pytest.skip('Timezone database is not available on Windows yet')
if sys.platform == "win32" and not util.windows_has_tzdata():
pytest.skip('Timezone database is not installed on Windows')
else:
for timezone in timezones:
_check_datetime_components(timestamps, timezone)


# TODO: We should test on windows once ARROW-13168 is resolved.
@pytest.mark.pandas
@pytest.mark.skipif(sys.platform == 'win32',
reason="Timezone database is not available on Windows yet")
@pytest.mark.skipif(sys.platform == "win32" and not util.windows_has_tzdata(),
reason="Timezone database is not installed on Windows")
def test_assume_timezone():
ts_type = pa.timestamp("ns")
timestamps = pd.to_datetime(["1970-01-01T00:00:59.123456789",
Expand Down Expand Up @@ -2235,9 +2238,8 @@ def _check_temporal_rounding(ts, values, unit):
np.testing.assert_array_equal(result, expected)


# TODO: We should test on windows once ARROW-13168 is resolved.
@pytest.mark.skipif(sys.platform == 'win32',
reason="Timezone database is not available on Windows yet")
@pytest.mark.skipif(sys.platform == "win32" and not util.windows_has_tzdata(),
reason="Timezone database is not installed on Windows")
@pytest.mark.parametrize('unit', ("nanosecond", "microsecond", "millisecond",
"second", "minute", "hour", "day"))
@pytest.mark.pandas
Expand Down
4 changes: 4 additions & 0 deletions python/pyarrow/tests/test_scalars.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@
import decimal
import pickle
import pytest
import sys
import weakref

import numpy as np

import pyarrow as pa
import pyarrow.compute as pc
from pyarrow.tests import util


@pytest.mark.parametrize(['value', 'ty', 'klass'], [
Expand Down Expand Up @@ -304,6 +306,8 @@ def test_cast():
pa.scalar('foo').cast('int32')


@pytest.mark.skipif(sys.platform == "win32" and not util.windows_has_tzdata(),
reason="Timezone database is not installed on Windows")
def test_cast_timestamp_to_string():
# GH-35370
pytest.importorskip("pytz")
Expand Down
9 changes: 9 additions & 0 deletions python/pyarrow/tests/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,3 +448,12 @@ def _configure_s3_limited_user(s3_server, policy):

except FileNotFoundError:
pytest.skip("Configuring limited s3 user failed")


def windows_has_tzdata():
"""
This is the default location where tz.cpp will look for (until we make
this configurable at run-time)
"""
tzdata_path = os.path.expandvars(r"%USERPROFILE%\Downloads\tzdata")
return os.path.exists(tzdata_path)