Skip to content

Commit

Permalink
BUG: Fix HDFStore empty keys on native HDF5 file by adding keyword in…
Browse files Browse the repository at this point in the history
…clude (#32723)
  • Loading branch information
roberthdevries authored Jun 14, 2020
1 parent f984364 commit 4f625a2
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 2 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -962,6 +962,7 @@ I/O
- Bug in :meth:`~DataFrame.to_excel` could not handle the column name `render` and was raising an ``KeyError`` (:issue:`34331`)
- Bug in :meth:`~SQLDatabase.execute` was raising a ``ProgrammingError`` for some DB-API drivers when the SQL statement contained the `%` character and no parameters were present (:issue:`34211`)
- Bug in :meth:`~pandas.io.stata.StataReader` which resulted in categorical variables with difference dtypes when reading data using an iterator. (:issue:`31544`)
- :meth:`HDFStore.keys` has now an optional `include` parameter that allows the retrieval of all native HDF5 table names (:issue:`29916`)

Plotting
^^^^^^^^
Expand Down
27 changes: 25 additions & 2 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -580,16 +580,39 @@ def __enter__(self):
def __exit__(self, exc_type, exc_value, traceback):
self.close()

def keys(self) -> List[str]:
def keys(self, include: str = "pandas") -> List[str]:
"""
Return a list of keys corresponding to objects stored in HDFStore.
Parameters
----------
include : str, default 'pandas'
When kind equals 'pandas' return pandas objects
When kind equals 'native' return native HDF5 Table objects
.. versionadded:: 1.1.0
Returns
-------
list
List of ABSOLUTE path-names (e.g. have the leading '/').
Raises
------
raises ValueError if kind has an illegal value
"""
return [n._v_pathname for n in self.groups()]
if include == "pandas":
return [n._v_pathname for n in self.groups()]

elif include == "native":
assert self._handle is not None # mypy
return [
n._v_pathname for n in self._handle.walk_nodes("/", classname="Table")
]
raise ValueError(
f"`include` should be either 'pandas' or 'native' but is '{include}'"
)

def __iter__(self):
return iter(self.keys())
Expand Down
34 changes: 34 additions & 0 deletions pandas/tests/io/pytables/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,40 @@ def create_h5_and_return_checksum(track_times):
# checksums are NOT same if track_time = True
assert checksum_0_tt_true != checksum_1_tt_true

def test_non_pandas_keys(self, setup_path):
class Table1(tables.IsDescription):
value1 = tables.Float32Col()

class Table2(tables.IsDescription):
value2 = tables.Float32Col()

class Table3(tables.IsDescription):
value3 = tables.Float32Col()

with ensure_clean_path(setup_path) as path:
with tables.open_file(path, mode="w") as h5file:
group = h5file.create_group("/", "group")
h5file.create_table(group, "table1", Table1, "Table 1")
h5file.create_table(group, "table2", Table2, "Table 2")
h5file.create_table(group, "table3", Table3, "Table 3")
with HDFStore(path) as store:
assert len(store.keys(include="native")) == 3
expected = {"/group/table1", "/group/table2", "/group/table3"}
assert set(store.keys(include="native")) == expected
assert set(store.keys(include="pandas")) == set()
for name in expected:
df = store.get(name)
assert len(df.columns) == 1

def test_keys_illegal_include_keyword_value(self, setup_path):
with ensure_clean_store(setup_path) as store:
with pytest.raises(
ValueError,
match="`include` should be either 'pandas' or 'native' "
"but is 'illegal'",
):
store.keys(include="illegal")

def test_keys_ignore_hdf_softlink(self, setup_path):

# GH 20523
Expand Down

0 comments on commit 4f625a2

Please sign in to comment.