Skip to content

Commit

Permalink
apacheGH-35600: [Python] Allow setting path to timezone db through py…
Browse files Browse the repository at this point in the history
…thon API (apache#37436)

### Rationale for this change

Add a function to change the path where timezone db should be found as a small wrapper around the setting of a C++ option `GlobalOptions`.

### What changes are included in this PR?

New function `configure_tzdb`.

### Are these changes tested?

### Are there any user-facing changes?

No.
* Closes: apache#35600

Lead-authored-by: AlenkaF <[email protected]>
Co-authored-by: Antoine Pitrou <[email protected]>
Co-authored-by: Alenka Frim <[email protected]>
Co-authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Joris Van den Bossche <[email protected]>
  • Loading branch information
3 people authored and loicalleyne committed Nov 13, 2023
1 parent 2cf2a82 commit 13ba225
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 6 deletions.
13 changes: 13 additions & 0 deletions ci/appveyor-cpp-build.bat
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,19 @@ set ARROW_HOME=%CONDA_PREFIX%\Library
@rem ARROW-3075; pkgconfig is broken for Parquet for now
set PARQUET_HOME=%CONDA_PREFIX%\Library

@rem Download IANA Timezone Database to a non-standard location to
@rem test the configurability of the timezone database path
curl https://data.iana.org/time-zones/releases/tzdata2021e.tar.gz --output tzdata.tar.gz || exit /B
mkdir %USERPROFILE%\Downloads\test\tzdata
tar --extract --file tzdata.tar.gz --directory %USERPROFILE%\Downloads\test\tzdata
curl https://raw.githubusercontent.com/unicode-org/cldr/master/common/supplemental/windowsZones.xml ^
--output %USERPROFILE%\Downloads\test\tzdata\windowsZones.xml || exit /B
@rem Remove the database from the default location
rmdir /s /q %USERPROFILE%\Downloads\tzdata
@rem Set the env var for the non-standard location of the database
@rem (only needed for testing purposes)
set PYARROW_TZDATA_PATH=%USERPROFILE%\Downloads\test\tzdata

python setup.py develop -q || exit /B

set PYTHONDEVMODE=1
Expand Down
8 changes: 4 additions & 4 deletions python/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,10 @@ def parse_git(root, **kwargs):
if _gc_enabled:
_gc.enable()

from pyarrow.lib import (BuildInfo, RuntimeInfo, MonthDayNano,
VersionInfo, cpp_build_info, cpp_version,
cpp_version_info, runtime_info, cpu_count,
set_cpu_count, enable_signal_handlers,
from pyarrow.lib import (BuildInfo, RuntimeInfo, set_timezone_db_path,
MonthDayNano, VersionInfo, cpp_build_info,
cpp_version, cpp_version_info, runtime_info,
cpu_count, set_cpu_count, enable_signal_handlers,
io_thread_count, set_io_thread_count)


Expand Down
19 changes: 19 additions & 0 deletions python/pyarrow/config.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from pyarrow.includes.libarrow cimport GetBuildInfo

from collections import namedtuple
import os


VersionInfo = namedtuple('VersionInfo', ('major', 'minor', 'patch'))
Expand Down Expand Up @@ -74,3 +75,21 @@ def runtime_info():
return RuntimeInfo(
simd_level=frombytes(c_info.simd_level),
detected_simd_level=frombytes(c_info.detected_simd_level))


def set_timezone_db_path(path):
"""
Configure the path to text timezone database on Windows.
Parameters
----------
path : str
Path to text timezone database.
"""
cdef:
CGlobalOptions options

if path is not None:
options.timezone_db_path = <c_string>tobytes(path)

check_status(Initialize(options))
5 changes: 5 additions & 0 deletions python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@ cdef extern from "arrow/config.h" namespace "arrow" nogil:

CRuntimeInfo GetRuntimeInfo()

cdef cppclass CGlobalOptions" arrow::GlobalOptions":
optional[c_string] timezone_db_path

CStatus Initialize(const CGlobalOptions& options)


cdef extern from "arrow/util/future.h" namespace "arrow" nogil:
cdef cppclass CFuture_Void" arrow::Future<>":
Expand Down
7 changes: 7 additions & 0 deletions python/pyarrow/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import hypothesis as h
from ..conftest import groups, defaults

from pyarrow import set_timezone_db_path
from pyarrow.util import find_free_port


Expand All @@ -48,6 +49,12 @@
os.environ['AWS_CONFIG_FILE'] = "/dev/null"


if sys.platform == 'win32':
tzdata_set_path = os.environ.get('PYARROW_TZDATA_PATH', None)
if tzdata_set_path:
set_timezone_db_path(tzdata_set_path)


def pytest_addoption(parser):
# Create options to selectively enable test groups
def bool_env(name, default=None):
Expand Down
12 changes: 12 additions & 0 deletions python/pyarrow/tests/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import pytest

import pyarrow as pa
from pyarrow.lib import ArrowInvalid


def test_get_include():
Expand Down Expand Up @@ -116,6 +117,17 @@ def test_runtime_info():
subprocess.check_call([sys.executable, "-c", code], env=env)


@pytest.mark.skipif(sys.platform == "win32",
reason="Path to timezone database is not configurable "
"on non-Windows platforms")
def test_set_timezone_db_path_non_windows():
# set_timezone_db_path raises an error on non-Windows platforms
with pytest.raises(ArrowInvalid,
match="Arrow was set to use OS timezone "
"database at compile time"):
pa.set_timezone_db_path("path")


@pytest.mark.parametrize('klass', [
pa.Field,
pa.Schema,
Expand Down
10 changes: 8 additions & 2 deletions python/pyarrow/tests/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,5 +455,11 @@ def windows_has_tzdata():
This is the default location where tz.cpp will look for (until we make
this configurable at run-time)
"""
tzdata_path = os.path.expandvars(r"%USERPROFILE%\Downloads\tzdata")
return os.path.exists(tzdata_path)
tzdata_bool = False
if "PYARROW_TZDATA_PATH" in os.environ:
tzdata_bool = os.path.exists(os.environ['PYARROW_TZDATA_PATH'])
if not tzdata_bool:
tzdata_path = os.path.expandvars(r"%USERPROFILE%\Downloads\tzdata")
tzdata_bool = os.path.exists(tzdata_path)

return tzdata_bool

0 comments on commit 13ba225

Please sign in to comment.