Skip to content

Commit

Permalink
Merge branch 'develop' into excel-write-engine
Browse files Browse the repository at this point in the history
-Some exception handling that was no longer necessary was removed.
-A comment noting pandas versions that would not recognize
 openpyxl was corrected.
  • Loading branch information
JS3xton committed Jun 15, 2020
2 parents 95b3ac1 + 63d8eeb commit 146b0fd
Show file tree
Hide file tree
Showing 10 changed files with 269 additions and 215 deletions.
271 changes: 154 additions & 117 deletions FlowCal/excel_ui.py

Large diffs are not rendered by default.

25 changes: 5 additions & 20 deletions FlowCal/mef.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,7 @@
import FlowCal.transform
import FlowCal.stats

# Use default colors from palettable if available
try:
import palettable
except ImportError as e:
standard_curve_colors = ['b', 'g', 'r']
else:
standard_curve_colors = \
palettable.colorbrewer.qualitative.Paired_12.mpl_colors[1::2]
standard_curve_colors = ['tab:blue', 'tab:green', 'tab:red']

def clustering_gmm(data,
n_clusters,
Expand Down Expand Up @@ -581,7 +574,8 @@ def get_transform_fxn(data_beads,
Known MEF values for the calibration bead subpopulations, for each
channel specified in `mef_channels`. The innermost sequences must
have the same length (the same number of bead subpopulations must
exist for each channel).
exist for each channel). Values of np.nan or None specify that a
subpopulation should be omitted from the fitting procedure.
mef_channels : int, or str, or list of int, or list of str
Channels for which to generate transformation functions.
verbose : bool, optional
Expand Down Expand Up @@ -917,18 +911,9 @@ def get_transform_fxn(data_beads,
else:
mef_channels = [mef_channels]
mef_values = [mef_values]

# Transform mef_values to numpy array
mef_values = np.array(mef_values)

# Ensure matching number of `mef_values` for all channels (this implies
# that the calibration beads have the same number of subpopulations for
# all channels).
if not np.all([len(mef_values_channel)==len(mef_values[0])
for mef_values_channel in mef_values]):
msg = "innermost sequences of mef_values must have the same length"
msg += " (same number of bead subpopulations must exist for each"
msg += " channel)"
raise ValueError(msg)
mef_values = np.array(mef_values, dtype=np.float)

###
# 1. Clustering
Expand Down
107 changes: 47 additions & 60 deletions FlowCal/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,7 @@
from matplotlib.font_manager import FontProperties
import warnings

# Use default colors from palettable if available
try:
import palettable
except ImportError as e:
cmap_default = plt.get_cmap(matplotlib.rcParams['image.cmap'])
else:
cmap_default = palettable.colorbrewer.diverging.Spectral_8_r.mpl_colormap
cmap_default = plt.get_cmap('Spectral_r')

savefig_dpi = 250

Expand Down Expand Up @@ -941,67 +935,59 @@ def hist1d(data_list,

# Iterate through data_list
for i, data in enumerate(data_list):
hist_kwargs = kwargs.copy() # note: this is a shallow copy

# Extract channel
if 'x' in hist_kwargs:
raise ValueError("`x` must be specified via `data_list`")
if data.ndim > 1:
y = data[:, channel]
hist_kwargs['x'] = data[:, channel]
else:
y = data
hist_kwargs['x'] = data

# If ``data_plot.hist_bins()`` exists, obtain bin edges from it if
# necessary. If it does not exist, do not modify ``bins``.
if hasattr(y, 'hist_bins') and hasattr(y.hist_bins, '__call__'):
hist_kwargs['bins'] = bins
if hasattr(hist_kwargs['x'], 'hist_bins') \
and hasattr(hist_kwargs['x'].hist_bins, '__call__'):
# If bins is None or an integer, get bin edges from
# ``data_plot.hist_bins()``.
if bins is None or isinstance(bins, int):
bins = y.hist_bins(channels=0,
nbins=bins,
scale=xscale,
**xscale_kwargs)

# Decide whether to normalize
if normed_height and not normed_area:
weights = np.ones_like(y)/float(len(y))
else:
weights = None

# Actually plot
if hist_kwargs['bins'] is None \
or isinstance(hist_kwargs['bins'], int):
hist_kwargs['bins'] = hist_kwargs['x'].hist_bins(
channels=0,
nbins=hist_kwargs['bins'],
scale=xscale,
**xscale_kwargs)

# Resolve normalizations
if 'density' in hist_kwargs:
msg = "use `normed_area` instead of `density`"
raise ValueError(msg)
if 'normed' in hist_kwargs:
msg = "use `normed_area` or `normed_height` instead of `normed`"
raise ValueError(msg)
if packaging.version.parse(matplotlib.__version__) \
>= packaging.version.parse('2.2'):
if bins is not None:
n, edges, patches = plt.hist(y,
bins,
weights=weights,
density=normed_area,
histtype=histtype,
edgecolor=edgecolor[i],
facecolor=facecolor[i],
**kwargs)
else:
n, edges, patches = plt.hist(y,
weights=weights,
density=normed_area,
histtype=histtype,
edgecolor=edgecolor[i],
facecolor=facecolor[i],
**kwargs)
hist_kwargs['density'] = normed_area
else:
if bins is not None:
n, edges, patches = plt.hist(y,
bins,
weights=weights,
normed=normed_area,
histtype=histtype,
edgecolor=edgecolor[i],
facecolor=facecolor[i],
**kwargs)
else:
n, edges, patches = plt.hist(y,
weights=weights,
normed=normed_area,
histtype=histtype,
edgecolor=edgecolor[i],
facecolor=facecolor[i],
**kwargs)
hist_kwargs['normed'] = normed_area

# Calculate weights if normalizing bins by height
if normed_height and not normed_area:
if 'weights' in hist_kwargs:
msg = "`weights` must not be specified if"
msg += " `normed_height=True`"
raise ValueError(msg)
hist_kwargs['weights'] = np.ones_like(hist_kwargs['x'])
hist_kwargs['weights'] /= float(len(hist_kwargs['x']))

hist_kwargs['histtype'] = histtype
hist_kwargs['facecolor'] = facecolor[i]
hist_kwargs['edgecolor'] = edgecolor[i]

# Plot
n, edges, patches = plt.hist(**hist_kwargs)

# Set scale of x axis
if xscale=='logicle':
Expand All @@ -1017,9 +1003,9 @@ def hist1d(data_list,
if xlabel is not None:
# Highest priority is user-provided label
plt.xlabel(xlabel)
elif hasattr(y, 'channels'):
elif hasattr(hist_kwargs['x'], 'channels'):
# Attempt to use channel name
plt.xlabel(y.channels[0])
plt.xlabel(hist_kwargs['x'].channels[0])

if ylabel is not None:
# Highest priority is user-provided label
Expand Down Expand Up @@ -1786,7 +1772,8 @@ def density_and_hist(data,

# Colors
n_colors = n_plots - 1
colors = [cmap_default(i) for i in np.linspace(0, 1, n_colors)]
default_property_cycler = plt.rcParams['axes.prop_cycle']()
colors = [next(default_property_cycler)['color'] for i in range(n_colors)]
# Histogram
for i, hist_channel in enumerate(hist_channels):
# Define subplot
Expand Down
5 changes: 2 additions & 3 deletions doc/getting_started/install_python.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,9 @@ Alternatively, download ``FlowCal`` from `here <https://github.com/taborlab/Flow

* ``packaging`` (>=16.8)
* ``six`` (>=1.10.0)
* ``numpy`` (>=1.8.2)
* ``scipy`` (>=0.14.0)
* ``numpy`` (>=1.9.0)
* ``scipy`` (>=0.19.0)
* ``matplotlib`` (>=2.0.0)
* ``palettable`` (>=2.1.1)
* ``scikit-image`` (>=0.10.0)
* ``scikit-learn`` (>=0.16.0)
* ``pandas`` (>=0.16.1)
Expand Down
4 changes: 2 additions & 2 deletions doc/python_tutorial/excel_ui.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ From there, one can obtain the file name and analysis options of each beads file
... verbose=True,
... plot=True)

``FlowCal.excel_ui.process_beads_table`` uses the instruments table and the beads table to automatically open, density-gate, and transform the specified beads files, and generate MEF transformation functions as indicated by the Excel input file. The flags ``verbose`` and ``plot`` instruct the function to generate messages for each file being processed, and plots for each step of standard curve calculation, similar to what we saw in the :doc:`MEF tutorial </python_tutorial/mef>`. The ouput arguments are ``beads_samples``, a list of transformed and gated FCSData objects, and ``mef_transform_fxns``, a dictionary of MEF transformation functions, indexed by the ID of the beads files.
``FlowCal.excel_ui.process_beads_table`` uses the instruments table and the beads table to automatically open, density-gate, and transform the specified beads files, and generate MEF transformation functions as indicated by the Excel input file. The flags ``verbose`` and ``plot`` instruct the function to generate messages for each file being processed, and plots for each step of standard curve calculation, similar to what we saw in the :doc:`MEF tutorial </python_tutorial/mef>`. The output arguments are ``beads_samples``, a dictionary of transformed and gated FCSData objects, and ``mef_transform_fxns``, a dictionary of MEF transformation functions, each indexed by the ID of the beads files.

In a similar way, ``FlowCal``'s Excel UI can automatically density-gate and transform cell samples using a single instruction:

Expand All @@ -62,6 +62,6 @@ In a similar way, ``FlowCal``'s Excel UI can automatically density-gate and tran
... verbose=True,
... plot=True)

``FlowCal.excel_ui.process_samples_table`` uses the instruments and samples tables to open, density-gate, and transform cell samples as specified, and return the processed data as a list of FCSData objects. If the input Excel file specifies that some samples should be transformed to MEF, ``FlowCal.excel_ui.process_samples_table`` also requires a dictionary with the respective MEF transformation functions (``mef_transform_fxns``), which was provided in the previous step by ``FlowCal.excel_ui.process_beads_table``.
``FlowCal.excel_ui.process_samples_table`` uses the instruments and samples tables to open, density-gate, and transform cell samples as specified, and return the processed data as a dictionary of FCSData objects. If the input Excel file specifies that some samples should be transformed to MEF, ``FlowCal.excel_ui.process_samples_table`` also requires a dictionary with the respective MEF transformation functions (``mef_transform_fxns``), which was provided in the previous step by ``FlowCal.excel_ui.process_beads_table``.

**This is all the code required to obtain a set of processed cell samples**. From here, one can perform any desired analysis on ``samples``. Note that ``samples_table`` contains any other information in the input Excel file not directly used by ``FlowCal``, such as inducer concentration, incubation time, etc. This can be used to build an induction curve, fluorescence vs. final optical density (OD), etc.
15 changes: 8 additions & 7 deletions examples/analyze_excel_ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,11 @@
# To do so, it requires a table describing the flow cytometer used
# (``instruments_table``). Here, we also use verbose mode, and indicate that
# plots describing individual steps should be generated in the folder
# "plot_beads". The result is a list of ``FCSData`` objects representing
# gated and transformed calibration beads samples (``beads_samples``), and
# a dictionary containing MEF transformation functions
# (``mef_transform_fxns``). This will be used later to process cell samples.
# "plot_beads". The result is a dictionary of ``FCSData`` objects
# representing gated and transformed calibration beads samples
# (``beads_samples``), and a dictionary containing MEF transformation
# functions (``mef_transform_fxns``). This will be used later to process
# cell samples.
beads_samples, mef_transform_fxns = FlowCal.excel_ui.process_beads_table(
beads_table=beads_table,
instruments_table=instruments_table,
Expand Down Expand Up @@ -98,7 +99,7 @@
# in the context of accessory matplotlib functions to modify the axes
# limits and labels and add a legend, among others.
plt.figure(figsize=(6,3.5))
FlowCal.plot.hist1d(samples,
FlowCal.plot.hist1d(list(samples.values()),
channel='FL1',
histtype='step',
bins=128)
Expand All @@ -118,7 +119,7 @@
# geometric mean from channel FL1 of each sample, and plot them against the
# corresponding IPTG concentrations.
samples_fluorescence = [FlowCal.stats.gmean(s, channels='FL1')
for s in samples]
for s in list(samples.values())]
plt.figure(figsize=(5.5, 3.5))
plt.plot(iptg,
samples_fluorescence,
Expand All @@ -130,4 +131,4 @@
plt.savefig('dose_response.png', dpi=200)
plt.close()

print("\nDone.")
print("\nDone.")
5 changes: 2 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
packaging>=16.8
six>=1.10.0
numpy>=1.8.2
scipy>=0.14.0
numpy>=1.9.0
scipy>=0.19.0
matplotlib>=2.0.0
palettable>=2.1.1
scikit-image>=0.10.0
scikit-learn>=0.16.0
pandas>=0.16.1
Expand Down
5 changes: 2 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,9 @@ def find_version(file_path):
# https://packaging.python.org/en/latest/requirements.html
install_requires=['packaging>=16.8',
'six>=1.10.0',
'numpy>=1.8.2',
'scipy>=0.14.0',
'numpy>=1.9.0',
'scipy>=0.19.0',
'matplotlib>=2.0.0',
'palettable>=2.1.1',
'scikit-image>=0.10.0',
'scikit-learn>=0.16.0',
'pandas>=0.16.1',
Expand Down
47 changes: 47 additions & 0 deletions test/test_excel_ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,53 @@ def test_read_table(self):
# Compare
tm.assert_frame_equal(table, expected_output)

def test_read_table_xls(self):
"""
Test for proper loading of a table from an old-format Excel sheet.
"""
xls_filename = 'test/test_excel_ui.xls'

# Sheet to read
sheetname = "Instruments"
# Column to use as index labels
index_col = "ID"

# Expected output
expected_output_list = []
row = {}
row[u'Description'] = u'Moake\'s Flow Cytometer'
row[u'Forward Scatter Channel'] = u'FSC-H'
row[u'Side Scatter Channel'] = u'SSC-H'
row[u'Fluorescence Channels'] = u'FL1-H, FL2-H, FL3-H'
row[u'Time Channel'] = u'Time'
expected_output_list.append(row)
row = {}
row[u'Description'] = u'Moake\'s Flow Cytometer (new acquisition card)'
row[u'Forward Scatter Channel'] = u'FSC'
row[u'Side Scatter Channel'] = u'SSC'
row[u'Fluorescence Channels'] = u'FL1, FL2, FL3'
row[u'Time Channel'] = u'TIME'
expected_output_list.append(row)
expected_index = pd.Series([u'FC001', u'FC002'], name='ID')
expected_columns = [u'Description',
u'Forward Scatter Channel',
u'Side Scatter Channel',
u'Fluorescence Channels',
u'Time Channel']

expected_output = pd.DataFrame(expected_output_list,
index=expected_index,
columns=expected_columns)

# Read table
table = FlowCal.excel_ui.read_table(xls_filename,
sheetname=sheetname,
index_col=index_col)

# Compare
tm.assert_frame_equal(table, expected_output)

def test_read_table_no_index_col(self):
"""
Test proper loading of a table when no index column is specified.
Expand Down
Binary file added test/test_excel_ui.xls
Binary file not shown.

0 comments on commit 146b0fd

Please sign in to comment.