Skip to content

Commit

Permalink
Fix reading Series with read_hdf (pandas-dev#16610)
Browse files Browse the repository at this point in the history
* Added test to reproduce issue pandas-dev#16583

* Fix pandas-dev#16583 by adding an explicit `mode` argument to `read_hdf`

kwargs which are meant for the opening of the HDFStore should be filtered out
before passing the remaining kwargs to the `select` function to load the data.

* Noted fix for pandas-dev#16583 in WhatsNew

(cherry picked from commit 196eb8e)
  • Loading branch information
frexvahi authored and TomAugspurger committed Jul 6, 2017
1 parent 9365d1f commit d8bccc4
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 7 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.3.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ I/O
^^^

-- Bug in ``pd.read_csv()`` in which files weren't opened as binary files by the C engine on Windows, causing EOF characters mid-field, which would fail (:issue:`16039`, :issue:`16559`, :issue`16675`)
-- Bug in ``pd.read_hdf()`` in which reading a ``Series`` saved to an HDF file in 'fixed' format fails when an explicit ``mode='r'`` argument is supplied (:issue:`16583`)

Plotting
^^^^^^^^
Expand Down
16 changes: 9 additions & 7 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,21 +282,24 @@ def to_hdf(path_or_buf, key, value, mode=None, complevel=None, complib=None,
f(path_or_buf)


def read_hdf(path_or_buf, key=None, **kwargs):
def read_hdf(path_or_buf, key=None, mode='r', **kwargs):
""" read from the store, close it if we opened it
Retrieve pandas object stored in file, optionally based on where
criteria
Parameters
----------
path_or_buf : path (string), buffer, or path object (pathlib.Path or
py._path.local.LocalPath) to read from
path_or_buf : path (string), buffer or path object (pathlib.Path or
py._path.local.LocalPath) designating the file to open, or an
already opened pd.HDFStore object
.. versionadded:: 0.19.0 support for pathlib, py.path.
key : group identifier in the store. Can be omitted if the HDF file
contains a single pandas object.
mode : string, {'r', 'r+', 'a'}, default 'r'. Mode to use when opening
the file. Ignored if path_or_buf is a pd.HDFStore.
where : list of Term (or convertable) objects, optional
start : optional, integer (defaults to None), row number to start
selection
Expand All @@ -313,10 +316,9 @@ def read_hdf(path_or_buf, key=None, **kwargs):
"""

if kwargs.get('mode', 'a') not in ['r', 'r+', 'a']:
if mode not in ['r', 'r+', 'a']:
raise ValueError('mode {0} is not allowed while performing a read. '
'Allowed modes are r, r+ and a.'
.format(kwargs.get('mode')))
'Allowed modes are r, r+ and a.'.format(mode))
# grab the scope
if 'where' in kwargs:
kwargs['where'] = _ensure_term(kwargs['where'], scope_level=1)
Expand All @@ -335,9 +337,9 @@ def read_hdf(path_or_buf, key=None, **kwargs):
raise compat.FileNotFoundError(
'File %s does not exist' % path_or_buf)

store = HDFStore(path_or_buf, mode=mode, **kwargs)
# can't auto open/close if we are using an iterator
# so delegate to the iterator
store = HDFStore(path_or_buf, **kwargs)
auto_close = True

elif isinstance(path_or_buf, HDFStore):
Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/io/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -5190,6 +5190,23 @@ def test_query_compare_column_type(self):
expected = df.loc[[], :]
tm.assert_frame_equal(expected, result)

@pytest.mark.parametrize('format', ['fixed', 'table'])
def test_read_hdf_series_mode_r(self, format):
# GH 16583
# Tests that reading a Series saved to an HDF file
# still works if a mode='r' argument is supplied
series = tm.makeFloatSeries()
with ensure_clean_path(self.path) as path:
series.to_hdf(path, key='data', format=format)
result = pd.read_hdf(path, key='data', mode='r')
tm.assert_series_equal(result, series)

@pytest.mark.skipif(sys.version_info < (3, 6), reason="Need python 3.6")
def test_fspath(self):
with tm.ensure_clean('foo.h5') as path:
with pd.HDFStore(path) as store:
assert os.fspath(store) == str(path)


class TestHDFComplexValues(Base):
# GH10447
Expand Down

0 comments on commit d8bccc4

Please sign in to comment.