Merge branch 'develop' into excel-write-engine

-Some exception handling that was no longer necessary was removed. -A comment noting pandas versions that would not recognize openpyxl was corrected.
taborlab · Jun 15, 2020 · 146b0fd · 146b0fd
2 parents 95b3ac1 + 63d8eeb
commit 146b0fd
Show file tree

Hide file tree

Showing 10 changed files with 269 additions and 215 deletions.
diff --git a/FlowCal/excel_ui.py b/FlowCal/excel_ui.py
diff --git a/FlowCal/mef.py b/FlowCal/mef.py
@@ -24,14 +24,7 @@
 import FlowCal.transform
 import FlowCal.stats
 
-# Use default colors from palettable if available
-try:
-    import palettable
-except ImportError as e:
-    standard_curve_colors = ['b', 'g', 'r']
-else:
-    standard_curve_colors = \
-        palettable.colorbrewer.qualitative.Paired_12.mpl_colors[1::2]
+standard_curve_colors = ['tab:blue', 'tab:green', 'tab:red']
 
 def clustering_gmm(data,
                    n_clusters,
@@ -581,7 +574,8 @@ def get_transform_fxn(data_beads,
         Known MEF values for the calibration bead subpopulations, for each
         channel specified in `mef_channels`. The innermost sequences must
         have the same length (the same number of bead subpopulations must
-        exist for each channel).
+        exist for each channel). Values of np.nan or None specify that a
+        subpopulation should be omitted from the fitting procedure.
     mef_channels : int, or str, or list of int, or list of str
         Channels for which to generate transformation functions.
     verbose : bool, optional
@@ -917,18 +911,9 @@ def get_transform_fxn(data_beads,
     else:
         mef_channels = [mef_channels]
         mef_values   = [mef_values]
+
     # Transform mef_values to numpy array
-    mef_values = np.array(mef_values)
-
-    # Ensure matching number of `mef_values` for all channels (this implies
-    # that the calibration beads have the same number of subpopulations for
-    # all channels).
-    if not np.all([len(mef_values_channel)==len(mef_values[0])
-                   for mef_values_channel in mef_values]):
-        msg  = "innermost sequences of mef_values must have the same length"
-        msg += " (same number of bead subpopulations must exist for each"
-        msg += " channel)"
-        raise ValueError(msg)
+    mef_values = np.array(mef_values, dtype=np.float)
 
     ###
     # 1. Clustering

diff --git a/FlowCal/plot.py b/FlowCal/plot.py
@@ -58,13 +58,7 @@
 from matplotlib.font_manager import FontProperties
 import warnings
 
-# Use default colors from palettable if available
-try:
-    import palettable
-except ImportError as e:
-    cmap_default = plt.get_cmap(matplotlib.rcParams['image.cmap'])
-else:
-    cmap_default = palettable.colorbrewer.diverging.Spectral_8_r.mpl_colormap
+cmap_default = plt.get_cmap('Spectral_r')
 
 savefig_dpi = 250
 
@@ -941,67 +935,59 @@ def hist1d(data_list,
 
     # Iterate through data_list
     for i, data in enumerate(data_list):
+        hist_kwargs = kwargs.copy()  # note: this is a shallow copy
+
         # Extract channel
+        if 'x' in hist_kwargs:
+            raise ValueError("`x` must be specified via `data_list`")
         if data.ndim > 1:
-            y = data[:, channel]
+            hist_kwargs['x'] = data[:, channel]
         else:
-            y = data
+            hist_kwargs['x'] = data
 
         # If ``data_plot.hist_bins()`` exists, obtain bin edges from it if
         # necessary. If it does not exist, do not modify ``bins``.
-        if hasattr(y, 'hist_bins') and hasattr(y.hist_bins, '__call__'):
+        hist_kwargs['bins'] = bins
+        if hasattr(hist_kwargs['x'], 'hist_bins') \
+                and hasattr(hist_kwargs['x'].hist_bins, '__call__'):
             # If bins is None or an integer, get bin edges from
             # ``data_plot.hist_bins()``.
-            if bins is None or isinstance(bins, int):
-                bins = y.hist_bins(channels=0,
-                                   nbins=bins,
-                                   scale=xscale,
-                                   **xscale_kwargs)
-
-        # Decide whether to normalize
-        if normed_height and not normed_area:
-            weights = np.ones_like(y)/float(len(y))
-        else:
-            weights = None
-
-        # Actually plot
+            if hist_kwargs['bins'] is None \
+                    or isinstance(hist_kwargs['bins'], int):
+                hist_kwargs['bins'] = hist_kwargs['x'].hist_bins(
+                    channels=0,
+                    nbins=hist_kwargs['bins'],
+                    scale=xscale,
+                    **xscale_kwargs)
+
+        # Resolve normalizations
+        if 'density' in hist_kwargs:
+            msg  = "use `normed_area` instead of `density`"
+            raise ValueError(msg)
+        if 'normed' in hist_kwargs:
+            msg  = "use `normed_area` or `normed_height` instead of `normed`"
+            raise ValueError(msg)
         if packaging.version.parse(matplotlib.__version__) \
                 >= packaging.version.parse('2.2'):
-            if bins is not None:
-                n, edges, patches = plt.hist(y,
-                                             bins,
-                                             weights=weights,
-                                             density=normed_area,
-                                             histtype=histtype,
-                                             edgecolor=edgecolor[i],
-                                             facecolor=facecolor[i],
-                                             **kwargs)
-            else:
-                n, edges, patches = plt.hist(y,
-                                             weights=weights,
-                                             density=normed_area,
-                                             histtype=histtype,
-                                             edgecolor=edgecolor[i],
-                                             facecolor=facecolor[i],
-                                             **kwargs)
+            hist_kwargs['density'] = normed_area
         else:
-            if bins is not None:
-                n, edges, patches = plt.hist(y,
-                                             bins,
-                                             weights=weights,
-                                             normed=normed_area,
-                                             histtype=histtype,
-                                             edgecolor=edgecolor[i],
-                                             facecolor=facecolor[i],
-                                             **kwargs)
-            else:
-                n, edges, patches = plt.hist(y,
-                                             weights=weights,
-                                             normed=normed_area,
-                                             histtype=histtype,
-                                             edgecolor=edgecolor[i],
-                                             facecolor=facecolor[i],
-                                             **kwargs)
+            hist_kwargs['normed'] = normed_area
+
+        # Calculate weights if normalizing bins by height
+        if normed_height and not normed_area:
+            if 'weights' in hist_kwargs:
+                msg  = "`weights` must not be specified if"
+                msg += " `normed_height=True`"
+                raise ValueError(msg)
+            hist_kwargs['weights'] = np.ones_like(hist_kwargs['x'])
+            hist_kwargs['weights'] /= float(len(hist_kwargs['x']))
+
+        hist_kwargs['histtype']  = histtype
+        hist_kwargs['facecolor'] = facecolor[i]
+        hist_kwargs['edgecolor'] = edgecolor[i]
+
+        # Plot
+        n, edges, patches = plt.hist(**hist_kwargs)
 
     # Set scale of x axis
     if xscale=='logicle':
@@ -1017,9 +1003,9 @@ def hist1d(data_list,
     if xlabel is not None:
         # Highest priority is user-provided label
         plt.xlabel(xlabel)
-    elif hasattr(y, 'channels'):
+    elif hasattr(hist_kwargs['x'], 'channels'):
         # Attempt to use channel name
-        plt.xlabel(y.channels[0])
+        plt.xlabel(hist_kwargs['x'].channels[0])
 
     if ylabel is not None:
         # Highest priority is user-provided label
@@ -1786,7 +1772,8 @@ def density_and_hist(data,
 
     # Colors
     n_colors = n_plots - 1
-    colors = [cmap_default(i) for i in np.linspace(0, 1, n_colors)]
+    default_property_cycler = plt.rcParams['axes.prop_cycle']()
+    colors = [next(default_property_cycler)['color'] for i in range(n_colors)]
     # Histogram
     for i, hist_channel in enumerate(hist_channels):
         # Define subplot

diff --git a/doc/getting_started/install_python.rst b/doc/getting_started/install_python.rst
@@ -11,10 +11,9 @@ Alternatively, download ``FlowCal`` from `here <https://github.com/taborlab/Flow
 
 * ``packaging`` (>=16.8)
 * ``six`` (>=1.10.0)
-* ``numpy`` (>=1.8.2)
-* ``scipy`` (>=0.14.0)
+* ``numpy`` (>=1.9.0)
+* ``scipy`` (>=0.19.0)
 * ``matplotlib`` (>=2.0.0)
-* ``palettable`` (>=2.1.1)
 * ``scikit-image`` (>=0.10.0)
 * ``scikit-learn`` (>=0.16.0)
 * ``pandas`` (>=0.16.1)

diff --git a/doc/python_tutorial/excel_ui.rst b/doc/python_tutorial/excel_ui.rst
@@ -51,7 +51,7 @@ From there, one can obtain the file name and analysis options of each beads file
 ...     verbose=True,
 ...     plot=True)
 
-``FlowCal.excel_ui.process_beads_table`` uses the instruments table and the beads table to automatically open, density-gate, and transform the specified beads files, and generate MEF transformation functions as indicated by the Excel input file. The flags ``verbose`` and ``plot`` instruct the function to generate messages for each file being processed, and plots for each step of standard curve calculation, similar to what we saw in the :doc:`MEF tutorial </python_tutorial/mef>`. The ouput arguments are ``beads_samples``, a list of transformed and gated FCSData objects, and ``mef_transform_fxns``, a dictionary of MEF transformation functions, indexed by the ID of the beads files.
+``FlowCal.excel_ui.process_beads_table`` uses the instruments table and the beads table to automatically open, density-gate, and transform the specified beads files, and generate MEF transformation functions as indicated by the Excel input file. The flags ``verbose`` and ``plot`` instruct the function to generate messages for each file being processed, and plots for each step of standard curve calculation, similar to what we saw in the :doc:`MEF tutorial </python_tutorial/mef>`. The output arguments are ``beads_samples``, a dictionary of transformed and gated FCSData objects, and ``mef_transform_fxns``, a dictionary of MEF transformation functions, each indexed by the ID of the beads files.
 
 In a similar way, ``FlowCal``'s Excel UI can automatically density-gate and transform cell samples using a single instruction:
 
@@ -62,6 +62,6 @@ In a similar way, ``FlowCal``'s Excel UI can automatically density-gate and tran
 ...     verbose=True,
 ...     plot=True)
 
-``FlowCal.excel_ui.process_samples_table`` uses the instruments and samples tables to open, density-gate, and transform cell samples as specified, and return the processed data as a list of FCSData objects. If the input Excel file specifies that some samples should be transformed to MEF, ``FlowCal.excel_ui.process_samples_table`` also requires a dictionary with the respective MEF transformation functions (``mef_transform_fxns``), which was provided in the previous step by ``FlowCal.excel_ui.process_beads_table``.
+``FlowCal.excel_ui.process_samples_table`` uses the instruments and samples tables to open, density-gate, and transform cell samples as specified, and return the processed data as a dictionary of FCSData objects. If the input Excel file specifies that some samples should be transformed to MEF, ``FlowCal.excel_ui.process_samples_table`` also requires a dictionary with the respective MEF transformation functions (``mef_transform_fxns``), which was provided in the previous step by ``FlowCal.excel_ui.process_beads_table``.
 
 **This is all the code required to obtain a set of processed cell samples**. From here, one can perform any desired analysis on ``samples``. Note that ``samples_table`` contains any other information in the input Excel file not directly used by ``FlowCal``, such as inducer concentration, incubation time, etc. This can be used to build an induction curve, fluorescence vs. final optical density (OD), etc.
diff --git a/examples/analyze_excel_ui.py b/examples/analyze_excel_ui.py
@@ -50,10 +50,11 @@
     # To do so, it requires a table describing the flow cytometer used
     # (``instruments_table``). Here, we also use verbose mode, and indicate that
     # plots describing individual steps should be generated in the folder
-    # "plot_beads". The result is a list of ``FCSData`` objects representing
-    # gated and transformed calibration beads samples (``beads_samples``), and
-    # a dictionary containing MEF transformation functions
-    # (``mef_transform_fxns``). This will be used later to process cell samples.
+    # "plot_beads". The result is a dictionary of ``FCSData`` objects
+    # representing gated and transformed calibration beads samples
+    # (``beads_samples``), and a dictionary containing MEF transformation
+    # functions (``mef_transform_fxns``). This will be used later to process
+    # cell samples.
     beads_samples, mef_transform_fxns = FlowCal.excel_ui.process_beads_table(
         beads_table=beads_table,
         instruments_table=instruments_table,
@@ -98,7 +99,7 @@
     # in the context of accessory matplotlib functions to modify the axes
     # limits and labels and add a legend, among others.
     plt.figure(figsize=(6,3.5))
-    FlowCal.plot.hist1d(samples,
+    FlowCal.plot.hist1d(list(samples.values()),
                         channel='FL1',
                         histtype='step',
                         bins=128)
@@ -118,7 +119,7 @@
     # geometric mean from channel FL1 of each sample, and plot them against the
     # corresponding IPTG concentrations.
     samples_fluorescence = [FlowCal.stats.gmean(s, channels='FL1')
-                            for s in samples]
+                            for s in list(samples.values())]
     plt.figure(figsize=(5.5, 3.5))
     plt.plot(iptg,
              samples_fluorescence,
@@ -130,4 +131,4 @@
     plt.savefig('dose_response.png', dpi=200)
     plt.close()
 
-    print("\nDone.")
+    print("\nDone.")
diff --git a/requirements.txt b/requirements.txt
@@ -1,9 +1,8 @@
 packaging>=16.8
 six>=1.10.0
-numpy>=1.8.2
-scipy>=0.14.0
+numpy>=1.9.0
+scipy>=0.19.0
 matplotlib>=2.0.0
-palettable>=2.1.1
 scikit-image>=0.10.0
 scikit-learn>=0.16.0
 pandas>=0.16.1

diff --git a/setup.py b/setup.py
@@ -87,10 +87,9 @@ def find_version(file_path):
     # https://packaging.python.org/en/latest/requirements.html
     install_requires=['packaging>=16.8',
                       'six>=1.10.0',
-                      'numpy>=1.8.2',
-                      'scipy>=0.14.0',
+                      'numpy>=1.9.0',
+                      'scipy>=0.19.0',
                       'matplotlib>=2.0.0',
-                      'palettable>=2.1.1',
                       'scikit-image>=0.10.0',
                       'scikit-learn>=0.16.0',
                       'pandas>=0.16.1',

diff --git a/test/test_excel_ui.py b/test/test_excel_ui.py
@@ -67,6 +67,53 @@ def test_read_table(self):
         # Compare
         tm.assert_frame_equal(table, expected_output)
 
+    def test_read_table_xls(self):
+        """
+        Test for proper loading of a table from an old-format Excel sheet.
+
+        """
+        xls_filename = 'test/test_excel_ui.xls'
+
+        # Sheet to read
+        sheetname = "Instruments"
+        # Column to use as index labels
+        index_col = "ID"
+
+        # Expected output
+        expected_output_list = []
+        row = {}
+        row[u'Description'] = u'Moake\'s Flow Cytometer'
+        row[u'Forward Scatter Channel'] = u'FSC-H'
+        row[u'Side Scatter Channel'] = u'SSC-H'
+        row[u'Fluorescence Channels'] = u'FL1-H, FL2-H, FL3-H'
+        row[u'Time Channel'] = u'Time'
+        expected_output_list.append(row)
+        row = {}
+        row[u'Description'] = u'Moake\'s Flow Cytometer (new acquisition card)'
+        row[u'Forward Scatter Channel'] = u'FSC'
+        row[u'Side Scatter Channel'] = u'SSC'
+        row[u'Fluorescence Channels'] = u'FL1, FL2, FL3'
+        row[u'Time Channel'] = u'TIME'
+        expected_output_list.append(row)
+        expected_index = pd.Series([u'FC001', u'FC002'], name='ID')
+        expected_columns = [u'Description',
+                            u'Forward Scatter Channel',
+                            u'Side Scatter Channel',
+                            u'Fluorescence Channels',
+                            u'Time Channel']
+
+        expected_output = pd.DataFrame(expected_output_list,
+                                       index=expected_index,
+                                       columns=expected_columns)
+
+        # Read table
+        table = FlowCal.excel_ui.read_table(xls_filename,
+                                            sheetname=sheetname,
+                                            index_col=index_col)
+
+        # Compare
+        tm.assert_frame_equal(table, expected_output)
+
     def test_read_table_no_index_col(self):
         """
         Test proper loading of a table when no index column is specified.

diff --git a/test/test_excel_ui.xls b/test/test_excel_ui.xls