Skip to content

Commit

Permalink
Merge branch 'excel_parse_utils' of https://github.com/cancan101/pandas
Browse files Browse the repository at this point in the history
… into cancan101-excel_parse_utils

Conflicts:
	doc/source/release.rst
  • Loading branch information
jreback committed Aug 26, 2013
2 parents f2a7b9c + 9fcd30b commit 516f802
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 14 deletions.
1 change: 1 addition & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ pandas 0.13
now returns a ``MultiIndex`` rather than an ``Index``. (:issue:`4039`)

- Infer and downcast dtype if ``downcast='infer'`` is passed to ``fillna/ffill/bfill`` (:issue:`4604`)
- Factored out excel_value_to_python_value from ExcelFile::_parse_excel (:issue:`4589`)

**Internal Refactoring**

Expand Down
31 changes: 18 additions & 13 deletions pandas/io/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,23 @@ def read_excel(path_or_buf, sheetname, kind=None, **kwds):
return ExcelFile(path_or_buf, kind=kind).parse(sheetname=sheetname,
kind=kind, **kwds)

def excel_value_to_python_value(value, typ, datemode):
from xlrd import (xldate_as_tuple, XL_CELL_DATE,
XL_CELL_ERROR, XL_CELL_BOOLEAN)

if typ == XL_CELL_DATE:
dt = xldate_as_tuple(value, datemode)
# how to produce this first case?
if dt[0] < datetime.MINYEAR: # pragma: no cover
value = datetime.time(*dt[3:])
else:
value = datetime.datetime(*dt)
elif typ == XL_CELL_ERROR:
value = np.nan
elif typ == XL_CELL_BOOLEAN:
value = bool(value)

return value

class ExcelFile(object):
"""
Expand Down Expand Up @@ -174,8 +191,6 @@ def _parse_excel(self, sheetname, header=0, skiprows=None, skip_footer=0,
index_col=None, has_index_names=None, parse_cols=None,
parse_dates=False, date_parser=None, na_values=None,
thousands=None, chunksize=None, **kwds):
from xlrd import (xldate_as_tuple, XL_CELL_DATE,
XL_CELL_ERROR, XL_CELL_BOOLEAN)

datemode = self.book.datemode
if isinstance(sheetname, compat.string_types):
Expand All @@ -193,17 +208,7 @@ def _parse_excel(self, sheetname, header=0, skiprows=None, skip_footer=0,
should_parse[j] = self._should_parse(j, parse_cols)

if parse_cols is None or should_parse[j]:
if typ == XL_CELL_DATE:
dt = xldate_as_tuple(value, datemode)
# how to produce this first case?
if dt[0] < datetime.MINYEAR: # pragma: no cover
value = datetime.time(*dt[3:])
else:
value = datetime.datetime(*dt)
elif typ == XL_CELL_ERROR:
value = np.nan
elif typ == XL_CELL_BOOLEAN:
value = bool(value)
value = excel_value_to_python_value(value=value, typ=typ, datemode=datemode)
row.append(value)

data.append(row)
Expand Down
Binary file added pandas/io/tests/data/types.xls
Binary file not shown.
29 changes: 28 additions & 1 deletion pandas/io/tests/test_excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
import pandas.io.parsers as parsers
from pandas.io.parsers import (read_csv, read_table, read_fwf,
TextParser, TextFileReader)
from pandas.io.excel import ExcelFile, ExcelWriter, read_excel
from pandas.io.excel import (ExcelFile, ExcelWriter, read_excel,
excel_value_to_python_value)
from pandas.util.testing import (assert_almost_equal,
assert_series_equal,
network,
Expand Down Expand Up @@ -260,6 +261,32 @@ def test_excel_table(self):
tm.assert_frame_equal(df4, df.ix[:-1])
tm.assert_frame_equal(df4, df5)

def test_excel_value_to_python_value(self):
_skip_if_no_xlrd()

pth = os.path.join(self.dirpath, 'types.xls')
xls = ExcelFile(pth)
book = xls.book
sheet = book.sheet_by_index(0)

cell = sheet.cell(0, 0)
self.assertEquals(excel_value_to_python_value(value=cell.value, typ=cell.ctype, datemode=book.datemode), 'date')

cell = sheet.cell(0, 1)
self.assertEquals(excel_value_to_python_value(value=cell.value, typ=cell.ctype, datemode=book.datemode), datetime(year=2013, month=4, day=1))

cell = sheet.cell(1, 1)
self.assertEquals(excel_value_to_python_value(value=cell.value, typ=cell.ctype, datemode=book.datemode), True)

cell = sheet.cell(2, 1)
self.assertEquals(excel_value_to_python_value(value=cell.value, typ=cell.ctype, datemode=book.datemode), 1)

cell = sheet.cell(3, 1)
self.assertEquals(excel_value_to_python_value(value=cell.value, typ=cell.ctype, datemode=book.datemode), 1.1)

cell = sheet.cell(4, 1)
self.assertIs(excel_value_to_python_value(value=cell.value, typ=cell.ctype, datemode=book.datemode), np.nan) #We need to use is here because value is NaN

def test_excel_read_buffer(self):
_skip_if_no_xlrd()
_skip_if_no_openpyxl()
Expand Down

0 comments on commit 516f802

Please sign in to comment.