diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 83beec5607986f..3b6288146bdf2e 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -97,6 +97,7 @@ Datetimelike ^^^^^^^^^^^^ - Bug in :meth:`Series.__setitem__` incorrectly casting ``np.timedelta64("NaT")`` to ``np.datetime64("NaT")`` when inserting into a :class:`Series` with datetime64 dtype (:issue:`27311`) - Bug in :meth:`Series.dt` property lookups when the underlying data is read-only (:issue:`27529`) +- Bug in ``HDFStore.__getitem__`` incorrectly reading tz attribute created in Python 2 (:issue:`26443`) - diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index fbe413f820c901..1ff3400323e54a 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2902,7 +2902,12 @@ def read_index_node(self, node, start=None, stop=None): kwargs["freq"] = node._v_attrs["freq"] if "tz" in node._v_attrs: - kwargs["tz"] = node._v_attrs["tz"] + if isinstance(node._v_attrs["tz"], bytes): + # created by python2 + kwargs["tz"] = node._v_attrs["tz"].decode("utf-8") + else: + # created by python3 + kwargs["tz"] = node._v_attrs["tz"] if kind in ("date", "datetime"): index = factory( diff --git a/pandas/tests/io/data/legacy_hdf/gh26443.h5 b/pandas/tests/io/data/legacy_hdf/gh26443.h5 new file mode 100644 index 00000000000000..45aa64324530f9 Binary files /dev/null and b/pandas/tests/io/data/legacy_hdf/gh26443.h5 differ diff --git a/pandas/tests/io/pytables/test_pytables.py b/pandas/tests/io/pytables/test_pytables.py index 7306393a1339ee..77cac00882771f 100644 --- a/pandas/tests/io/pytables/test_pytables.py +++ b/pandas/tests/io/pytables/test_pytables.py @@ -5447,3 +5447,16 @@ def test_read_with_where_tz_aware_index(self): store.append(key, expected, format="table", append=True) result = pd.read_hdf(path, key, where="DATE > 20151130") assert_frame_equal(result, expected) + + def test_py2_created_with_datetimez(self, datapath): + # The test HDF5 file was created in Python 2, but could not be read in + # Python 3. + # + # GH26443 + index = [pd.Timestamp("2019-01-01T18:00").tz_localize("America/New_York")] + expected = DataFrame({"data": 123}, index=index) + with ensure_clean_store( + datapath("io", "data", "legacy_hdf", "gh26443.h5"), mode="r" + ) as store: + result = store["key"] + assert_frame_equal(result, expected)