diff --git a/FlowCal/excel_ui.py b/FlowCal/excel_ui.py index 3408760..fb01990 100644 --- a/FlowCal/excel_ui.py +++ b/FlowCal/excel_ui.py @@ -96,6 +96,10 @@ from matplotlib import pyplot as plt import numpy as np import pandas as pd +try: + import openpyxl +except ImportError: + pass import FlowCal.io import FlowCal.plot @@ -115,7 +119,7 @@ class ExcelUIException(Exception): """ pass -def read_table(filename, sheetname, index_col=None): +def read_table(filename, sheetname, index_col=None, engine=None): """ Return the contents of an Excel table as a pandas DataFrame. @@ -128,6 +132,9 @@ def read_table(filename, sheetname, index_col=None): index_col : str, optional Column name or index to be used as row labels of the DataFrame. If None, default index will be used. + engine : str, optional + Engine used by `pd.read_excel()` to read Excel file. If None, try + 'openpyxl' then 'xlrd'. Returns ------- @@ -150,17 +157,53 @@ def read_table(filename, sheetname, index_col=None): raise TypeError("sheetname should specify a single sheet") # Load excel table using pandas - # Parameter specifying sheet name is slightly different depending on pandas' - # version. + read_excel_kwargs = {'io':filename,'index_col':index_col} + + # Parameter specifying sheet name depends on pandas version if packaging.version.parse(pd.__version__) \ < packaging.version.parse('0.21'): - table = pd.read_excel(filename, - sheetname=sheetname, - index_col=index_col) + read_excel_kwargs['sheetname'] = sheetname else: - table = pd.read_excel(filename, - sheet_name=sheetname, - index_col=index_col) + read_excel_kwargs['sheet_name'] = sheetname + + if engine is None: + # try reading Excel file using openpyxl engine first, then xlrd + try: + read_excel_kwargs['engine'] = 'openpyxl' + table = pd.read_excel(**read_excel_kwargs) + except ImportError as e: + if not('openpyxl' in str(e).lower() + and 'missing' in str(e).lower()): + raise + else: + # pandas recognizes openpyxl but package is missing, try xlrd + read_excel_kwargs['engine'] = 'xlrd' + table = pd.read_excel(**read_excel_kwargs) + except ValueError as e: + if not('openpyxl' in str(e).lower() + and 'unknown' in str(e).lower()): + raise + else: + # pandas does not recognize openpyxl (e.g. pandas + # version <= 0.25.0), try xlrd + read_excel_kwargs['engine'] = 'xlrd' + table = pd.read_excel(**read_excel_kwargs) + except Exception as e: + if 'openpyxl' in sys.modules \ + and isinstance(e, openpyxl.utils.exceptions \ + .InvalidFileException): + # unsupported file type (e.g. .xls), try xlrd + # + # (note: openpyxl's InvalidFileException has been stable at + # that location since v2.2.0) + read_excel_kwargs['engine'] = 'xlrd' + table = pd.read_excel(**read_excel_kwargs) + else: + raise + else: + read_excel_kwargs['engine'] = engine + table = pd.read_excel(**read_excel_kwargs) + # Eliminate rows whose index are null if index_col is not None: table = table[pd.notnull(table.index)] diff --git a/doc/getting_started/install_python.rst b/doc/getting_started/install_python.rst index dcc7d2a..6537f2a 100644 --- a/doc/getting_started/install_python.rst +++ b/doc/getting_started/install_python.rst @@ -19,6 +19,7 @@ Alternatively, download ``FlowCal`` from `here =0.16.0) * ``pandas`` (>=0.16.1) * ``xlrd`` (>=0.9.2) +* ``openpyxl`` (>=2.4.1) * ``XlsxWriter`` (>=0.5.2) If you have ``pip``, a ``requirements.txt`` file is provided, such that the required packages can be installed by running:: @@ -52,4 +53,4 @@ Again, some users may need to precede the previous commands with ``sudo``. sudo pip install --upgrade pip - After this, you may install ``FlowCal`` by following the steps above. \ No newline at end of file + After this, you may install ``FlowCal`` by following the steps above. diff --git a/requirements.txt b/requirements.txt index f23e2e7..b259aee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,4 +8,5 @@ scikit-image>=0.10.0 scikit-learn>=0.16.0 pandas>=0.16.1 xlrd>=0.9.2 +openpyxl>=2.4.1 XlsxWriter>=0.5.2 diff --git a/setup.py b/setup.py index 6d84cd1..69240f7 100644 --- a/setup.py +++ b/setup.py @@ -95,6 +95,7 @@ def find_version(file_path): 'scikit-learn>=0.16.0', 'pandas>=0.16.1', 'xlrd>=0.9.2', + 'openpyxl>=2.4.1', 'XlsxWriter>=0.5.2'], # List additional groups of dependencies here (e.g. development diff --git a/test/test_excel_ui.py b/test/test_excel_ui.py index aba240f..a242eac 100644 --- a/test/test_excel_ui.py +++ b/test/test_excel_ui.py @@ -67,6 +67,53 @@ def test_read_table(self): # Compare tm.assert_frame_equal(table, expected_output) + def test_read_table_xls(self): + """ + Test for proper loading of a table from an old-format Excel sheet. + + """ + xls_filename = 'test/test_excel_ui.xls' + + # Sheet to read + sheetname = "Instruments" + # Column to use as index labels + index_col = "ID" + + # Expected output + expected_output_list = [] + row = {} + row[u'Description'] = u'Moake\'s Flow Cytometer' + row[u'Forward Scatter Channel'] = u'FSC-H' + row[u'Side Scatter Channel'] = u'SSC-H' + row[u'Fluorescence Channels'] = u'FL1-H, FL2-H, FL3-H' + row[u'Time Channel'] = u'Time' + expected_output_list.append(row) + row = {} + row[u'Description'] = u'Moake\'s Flow Cytometer (new acquisition card)' + row[u'Forward Scatter Channel'] = u'FSC' + row[u'Side Scatter Channel'] = u'SSC' + row[u'Fluorescence Channels'] = u'FL1, FL2, FL3' + row[u'Time Channel'] = u'TIME' + expected_output_list.append(row) + expected_index = pd.Series([u'FC001', u'FC002'], name='ID') + expected_columns = [u'Description', + u'Forward Scatter Channel', + u'Side Scatter Channel', + u'Fluorescence Channels', + u'Time Channel'] + + expected_output = pd.DataFrame(expected_output_list, + index=expected_index, + columns=expected_columns) + + # Read table + table = FlowCal.excel_ui.read_table(xls_filename, + sheetname=sheetname, + index_col=index_col) + + # Compare + tm.assert_frame_equal(table, expected_output) + def test_read_table_no_index_col(self): """ Test proper loading of a table when no index column is specified. diff --git a/test/test_excel_ui.xls b/test/test_excel_ui.xls new file mode 100644 index 0000000..8169fc3 Binary files /dev/null and b/test/test_excel_ui.xls differ