Skip to content

Commit

Permalink
BUG: pytables with non-nano dt64 (#55622)
Browse files Browse the repository at this point in the history
* BUG: pytables with non-nano dt64

* GH ref

* fix whatsnew
  • Loading branch information
jbrockmendel authored Oct 23, 2023
1 parent f32c52d commit ea65f90
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 22 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -365,8 +365,10 @@ I/O
- Bug in :func:`read_csv` with ``engine="pyarrow"`` where ``usecols`` wasn't working with a csv with no headers (:issue:`54459`)
- Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`)
- Bug in :func:`to_excel`, with ``OdsWriter`` (``ods`` files) writing boolean/string value (:issue:`54994`)
- Bug in :meth:`DataFrame.to_hdf` and :func:`read_hdf` with ``datetime64`` dtypes with non-nanosecond resolution failing to round-trip correctly (:issue:`55622`)
- Bug in :meth:`pandas.read_excel` with an ODS file without cached formatted cell for float values (:issue:`55219`)
- Bug where :meth:`DataFrame.to_json` would raise an ``OverflowError`` instead of a ``TypeError`` with unsupported NumPy types (:issue:`55403`)
-

Period
^^^^^^
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/computation/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def stringify(value):

kind = ensure_decoded(self.kind)
meta = ensure_decoded(self.meta)
if kind in ("datetime64", "datetime"):
if kind == "datetime" or (kind and kind.startswith("datetime64")):
if isinstance(v, (int, float)):
v = stringify(v)
v = ensure_decoded(v)
Expand Down
38 changes: 24 additions & 14 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -2152,7 +2152,6 @@ def convert(

val_kind = _ensure_decoded(self.kind)
values = _maybe_convert(values, val_kind, encoding, errors)

kwargs = {}
kwargs["name"] = _ensure_decoded(self.index_name)

Expand Down Expand Up @@ -2577,7 +2576,7 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
dtype = _ensure_decoded(dtype_name)

# reverse converts
if dtype == "datetime64":
if dtype.startswith("datetime64"):
# recreate with tz if indicated
converted = _set_tz(converted, tz, coerce=True)

Expand Down Expand Up @@ -2870,7 +2869,9 @@ def _get_index_factory(self, attrs):

def f(values, freq=None, tz=None):
# data are already in UTC, localize and convert if tz present
dta = DatetimeArray._simple_new(values.values, freq=freq)
dta = DatetimeArray._simple_new(
values.values, dtype=values.dtype, freq=freq
)
result = DatetimeIndex._simple_new(dta, name=None)
if tz is not None:
result = result.tz_localize("UTC").tz_convert(tz)
Expand Down Expand Up @@ -2961,7 +2962,7 @@ def read_array(self, key: str, start: int | None = None, stop: int | None = None
else:
ret = node[start:stop]

if dtype == "datetime64":
if dtype and dtype.startswith("datetime64"):
# reconstruct a timezone if indicated
tz = getattr(attrs, "tz", None)
ret = _set_tz(ret, tz, coerce=True)
Expand Down Expand Up @@ -3170,7 +3171,7 @@ def write_array(

elif lib.is_np_dtype(value.dtype, "M"):
self._handle.create_array(self.group, key, value.view("i8"))
getattr(self.group, key)._v_attrs.value_type = "datetime64"
getattr(self.group, key)._v_attrs.value_type = str(value.dtype)
elif isinstance(value.dtype, DatetimeTZDtype):
# store as UTC
# with a zone
Expand All @@ -3185,7 +3186,7 @@ def write_array(
# error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no
# attribute "tz"
node._v_attrs.tz = _get_tz(value.tz) # type: ignore[union-attr]
node._v_attrs.value_type = "datetime64"
node._v_attrs.value_type = f"datetime64[{value.dtype.unit}]"
elif lib.is_np_dtype(value.dtype, "m"):
self._handle.create_array(self.group, key, value.view("i8"))
getattr(self.group, key)._v_attrs.value_type = "timedelta64"
Expand Down Expand Up @@ -4689,7 +4690,6 @@ def read(
selection = Selection(self, where=where, start=start, stop=stop)
# apply the selection filters & axis orderings
df = self.process_axes(df, selection=selection, columns=columns)

return df


Expand Down Expand Up @@ -4932,11 +4932,12 @@ def _set_tz(
# call below (which returns an ndarray). So we are only non-lossy
# if `tz` matches `values.tz`.
assert values.tz is None or values.tz == tz
if values.tz is not None:
return values

if tz is not None:
if isinstance(values, DatetimeIndex):
name = values.name
values = values.asi8
else:
name = None
values = values.ravel()
Expand Down Expand Up @@ -5019,8 +5020,12 @@ def _convert_index(name: str, index: Index, encoding: str, errors: str) -> Index
def _unconvert_index(data, kind: str, encoding: str, errors: str) -> np.ndarray | Index:
index: Index | np.ndarray

if kind == "datetime64":
index = DatetimeIndex(data)
if kind.startswith("datetime64"):
if kind == "datetime64":
# created before we stored resolution information
index = DatetimeIndex(data)
else:
index = DatetimeIndex(data.view(kind))
elif kind == "timedelta64":
index = TimedeltaIndex(data)
elif kind == "date":
Expand Down Expand Up @@ -5194,6 +5199,8 @@ def _maybe_convert(values: np.ndarray, val_kind: str, encoding: str, errors: str
def _get_converter(kind: str, encoding: str, errors: str):
if kind == "datetime64":
return lambda x: np.asarray(x, dtype="M8[ns]")
elif "datetime64" in kind:
return lambda x: np.asarray(x, dtype=kind)
elif kind == "string":
return lambda x: _unconvert_string_array(
x, nan_rep=None, encoding=encoding, errors=errors
Expand All @@ -5203,7 +5210,7 @@ def _get_converter(kind: str, encoding: str, errors: str):


def _need_convert(kind: str) -> bool:
if kind in ("datetime64", "string"):
if kind in ("datetime64", "string") or "datetime64" in kind:
return True
return False

Expand Down Expand Up @@ -5248,7 +5255,7 @@ def _dtype_to_kind(dtype_str: str) -> str:
elif dtype_str.startswith(("int", "uint")):
kind = "integer"
elif dtype_str.startswith("datetime64"):
kind = "datetime64"
kind = dtype_str
elif dtype_str.startswith("timedelta"):
kind = "timedelta64"
elif dtype_str.startswith("bool"):
Expand All @@ -5273,8 +5280,11 @@ def _get_data_and_dtype_name(data: ArrayLike):
if isinstance(data, Categorical):
data = data.codes

# For datetime64tz we need to drop the TZ in tests TODO: why?
dtype_name = data.dtype.name.split("[")[0]
if isinstance(data.dtype, DatetimeTZDtype):
# For datetime64tz we need to drop the TZ in tests TODO: why?
dtype_name = f"datetime64[{data.dtype.unit}]"
else:
dtype_name = data.dtype.name

if data.dtype.kind in "mM":
data = np.asarray(data.view("i8"))
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/pytables/test_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,7 +772,7 @@ def test_append_raise(setup_path):
"dtype->bytes24,kind->string,shape->(1, 30)] "
"vs current table "
"[name->values_block_1,cname->values_block_1,"
"dtype->datetime64,kind->datetime64,shape->None]"
"dtype->datetime64[s],kind->datetime64[s],shape->None]"
)
with pytest.raises(ValueError, match=msg):
store.append("df", df)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/pytables/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def test_table_index_incompatible_dtypes(setup_path):

with ensure_clean_store(setup_path) as store:
store.put("frame", df1, format="table")
msg = re.escape("incompatible kind in col [integer - datetime64]")
msg = re.escape("incompatible kind in col [integer - datetime64[ns]]")
with pytest.raises(TypeError, match=msg):
store.put("frame", df2, format="table", append=True)

Expand Down
16 changes: 11 additions & 5 deletions pandas/tests/io/pytables/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,16 +541,22 @@ def test_store_index_name(setup_path):
tm.assert_frame_equal(recons, df)


@pytest.mark.parametrize("tz", [None, "US/Pacific"])
@pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
@pytest.mark.parametrize("table_format", ["table", "fixed"])
def test_store_index_name_numpy_str(tmp_path, table_format, setup_path):
def test_store_index_name_numpy_str(tmp_path, table_format, setup_path, unit, tz):
# GH #13492
idx = Index(
pd.to_datetime([dt.date(2000, 1, 1), dt.date(2000, 1, 2)]),
name="cols\u05d2",
)
idx1 = Index(
pd.to_datetime([dt.date(2010, 1, 1), dt.date(2010, 1, 2)]),
name="rows\u05d0",
).tz_localize(tz)
idx1 = (
Index(
pd.to_datetime([dt.date(2010, 1, 1), dt.date(2010, 1, 2)]),
name="rows\u05d0",
)
.as_unit(unit)
.tz_localize(tz)
)
df = DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1)

Expand Down

0 comments on commit ea65f90

Please sign in to comment.