From 4e781247f4264123bcef02e2ace5d95a7605ab9c Mon Sep 17 00:00:00 2001 From: Mike Richards Date: Fri, 2 Feb 2018 08:07:16 -0800 Subject: [PATCH 1/2] BUGFIX - AttributeError raised in StataReader.value_labels() --- doc/source/whatsnew/v0.23.0.txt | 2 ++ pandas/io/stata.py | 8 +++++--- pandas/tests/io/test_stata.py | 9 +++++++++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 26a7a78bb5c55..cd0f1377d4de3 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -530,6 +530,8 @@ I/O - Bug in :func:`DataFrame.to_parquet` where an exception was raised if the write destination is S3 (:issue:`19134`) - :class:`Interval` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`) - :class:`Timedelta` now supported in :func:`DataFrame.to_excel` for xls file type (:issue:`19242`, :issue:`9155`) +- Bug in :class:`pandas.io.stata.StataReader` raising ``AttributeError`` when ``value_labels()`` called with very old files. Now returns an empty dict (:issue:`19417`) + Plotting ^^^^^^^^ diff --git a/pandas/io/stata.py b/pandas/io/stata.py index adbff06364dbe..ee6975ea1d938 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1341,12 +1341,14 @@ def _null_terminate(self, s): return s def _read_value_labels(self): - if self.format_version <= 108: - # Value labels are not supported in version 108 and earlier. - return if self._value_labels_read: # Don't read twice return + if self.format_version <= 108: + # Value labels are not supported in version 108 and earlier. + self._value_labels_read = True + self.value_label_dict = dict() + return if self.format_version >= 117: self.path_or_buf.seek(self.seek_value_labels) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 89d76061329a3..e2ad731eb4ec5 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -589,6 +589,15 @@ def test_105(self): df0['psch_dis'] = df0["psch_dis"].astype(np.float32) tm.assert_frame_equal(df.head(3), df0) + def test_value_labels_old_format(self): + # GH 19417 + # + # Test that value_labels() returns an empty dict if the file format + # predates supporting value labels. + dpath = os.path.join(self.dirpath, 'S4_EDUC1.dta') + reader = StataReader(dpath) + assert reader.value_labels() == {} + def test_date_export_formats(self): columns = ['tc', 'td', 'tw', 'tm', 'tq', 'th', 'ty'] conversions = {c: c for c in columns} From 69878878fc478524d034d08ec58f2d99c4a40890 Mon Sep 17 00:00:00 2001 From: Mike Richards Date: Mon, 5 Feb 2018 07:48:26 -0800 Subject: [PATCH 2/2] Address PR comments --- doc/source/whatsnew/v0.23.0.txt | 3 +-- pandas/tests/io/test_stata.py | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index cd0f1377d4de3..459bef04edb79 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -530,8 +530,7 @@ I/O - Bug in :func:`DataFrame.to_parquet` where an exception was raised if the write destination is S3 (:issue:`19134`) - :class:`Interval` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`) - :class:`Timedelta` now supported in :func:`DataFrame.to_excel` for xls file type (:issue:`19242`, :issue:`9155`) -- Bug in :class:`pandas.io.stata.StataReader` raising ``AttributeError`` when ``value_labels()`` called with very old files. Now returns an empty dict (:issue:`19417`) - +- Bug in :meth:`pandas.io.stata.StataReader.value_labels` raising an ``AttributeError`` when called on very old files. Now returns an empty dict (:issue:`19417`) Plotting ^^^^^^^^ diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index e2ad731eb4ec5..4e259d0994bdb 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -597,6 +597,7 @@ def test_value_labels_old_format(self): dpath = os.path.join(self.dirpath, 'S4_EDUC1.dta') reader = StataReader(dpath) assert reader.value_labels() == {} + reader.close() def test_date_export_formats(self): columns = ['tc', 'td', 'tw', 'tm', 'tq', 'th', 'ty']