diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index ea56ebad7d782..3109798499135 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -626,6 +626,7 @@ I/O - Bug in :func:`DataFrame.to_parquet` where an exception was raised if the write destination is S3 (:issue:`19134`) - :class:`Interval` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`) - :class:`Timedelta` now supported in :func:`DataFrame.to_excel` for xls file type (:issue:`19242`, :issue:`9155`) +- Bug in :meth:`pandas.io.stata.StataReader.value_labels` raising an ``AttributeError`` when called on very old files. Now returns an empty dict (:issue:`19417`) Plotting ^^^^^^^^ diff --git a/pandas/io/stata.py b/pandas/io/stata.py index adbff06364dbe..ee6975ea1d938 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1341,12 +1341,14 @@ def _null_terminate(self, s): return s def _read_value_labels(self): - if self.format_version <= 108: - # Value labels are not supported in version 108 and earlier. - return if self._value_labels_read: # Don't read twice return + if self.format_version <= 108: + # Value labels are not supported in version 108 and earlier. + self._value_labels_read = True + self.value_label_dict = dict() + return if self.format_version >= 117: self.path_or_buf.seek(self.seek_value_labels) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 89d76061329a3..4e259d0994bdb 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -589,6 +589,16 @@ def test_105(self): df0['psch_dis'] = df0["psch_dis"].astype(np.float32) tm.assert_frame_equal(df.head(3), df0) + def test_value_labels_old_format(self): + # GH 19417 + # + # Test that value_labels() returns an empty dict if the file format + # predates supporting value labels. + dpath = os.path.join(self.dirpath, 'S4_EDUC1.dta') + reader = StataReader(dpath) + assert reader.value_labels() == {} + reader.close() + def test_date_export_formats(self): columns = ['tc', 'td', 'tw', 'tm', 'tq', 'th', 'ty'] conversions = {c: c for c in columns}