diff --git a/FlowCal/plot.py b/FlowCal/plot.py index f2a4109..4f8e4d8 100644 --- a/FlowCal/plot.py +++ b/FlowCal/plot.py @@ -10,10 +10,10 @@ where `data_list` is a NxD FCSData object or numpy array, or a list of such, `channels` spcecifies the channel or channels to use for the plot, `parameters` are function-specific parameters, and `savefig` indicates - whether to save the figure to an image file. Note that `hist1d` uses - `channel` instead of `channels`, since it uses a single channel, and - `density2d` only accepts one FCSData object or numpy array as its first - argument. + whether to save the figure to an image file. Note that `hist1d` and + `violin` use `channel` instead of `channels`, since they use a single + channel, and `density2d` only accepts one FCSData object or numpy array + as its first argument. Simple Plot Functions do not create a new figure or axis, so they can be called directly to plot in a previously created axis if desired. If @@ -28,6 +28,7 @@ The following functions in this module are Simple Plot Functions: - ``hist1d`` + - ``violin`` - ``density2d`` - ``scatter2d`` - ``scatter3d`` @@ -47,6 +48,7 @@ """ import packaging +import collections import numpy as np import scipy.ndimage.filters import matplotlib @@ -58,61 +60,21 @@ from matplotlib.font_manager import FontProperties import warnings +# expose the collections module abstract base classes (ABCs) in both +# Python 2 and 3 +try: + # python 3 + collectionsAbc = collections.abc +except AttributeError: + # python 2 + collectionsAbc = collections + cmap_default = plt.get_cmap('Spectral_r') savefig_dpi = 250 ### -# HELPER FUNCTIONS FOR SCALE CLASSES -### - -def _base_down(x, base=10): - """ - Floor `x` to the nearest lower ``base^n``, where ``n`` is an integer. - - Parameters - ---------- - x : float - Number to calculate the floor from. - base : float, optional - Base used to calculate the floor. - - Return - ------ - float - The nearest lower ``base^n`` from `x`, where ``n`` is an integer. - - """ - if x == 0.0: - return -base - lx = np.floor(np.log(x) / np.log(base)) - return base ** lx - - -def _base_up(x, base=10): - """ - Ceil `x` to the nearest higher ``base^n``, where ``n`` is an integer. - - Parameters - ---------- - x : float - Number to calculate the ceiling from. - base : float, optional - Base used to calculate the ceiling. - - Return - ------ - float - The nearest higher ``base^n`` from `x`, where ``n`` is an integer. - - """ - if x == 0.0: - return base - lx = np.ceil(np.log(x) / np.log(base)) - return base ** lx - -### -# CUSTOM SCALES +# CUSTOM TRANSFORMS ### class _InterpolatedInverseTransform(matplotlib.transforms.Transform): @@ -425,6 +387,10 @@ def inverted(self): smin=0, smax=self._M) +### +# CUSTOM TICK LOCATORS AND FORMATTERS +### + class _LogicleLocator(matplotlib.ticker.Locator): """ Determine the tick locations for logicle axes. @@ -684,6 +650,281 @@ def view_limits(self, vmin, vmax): result = matplotlib.transforms.nonsingular(vmin, vmax) return result +class _ViolinAutoLocator(matplotlib.ticker.MaxNLocator): + """ + Default linear tick locator aware of min and max violins. + + Parameters + ---------- + min_tick_loc : int or float, optional + Location of min violin tick. Default is None. + max_tick_loc : int or float, optional + Location of max violin tick. Default is None. + data_lim_min : int or float, optional + Location of lower boundary of data, at or below which ticks are not + illustrated. Default is None. + + Other parameters + ---------------- + See matplotlib.ticker.MaxNLocator. + + Notes + ----- + The `nbins` default is 'auto' and the `steps` default is + (1, 2, 2.5, 5, 10) to emulate matplotlib.ticker.AutoLocator, which + subclasses matplotlib.ticker.MaxNLocator and installs nice defaults. + + """ + default_params = matplotlib.ticker.MaxNLocator.default_params.copy() + + # use defaults from matplotlib.ticker.AutoLocator + default_params['nbins'] = 'auto' + default_params['steps'] = (1, 2, 2.5, 5, 10) + + # add parameters specific to violin plots + default_params.update({'min_tick_loc' : None, + 'max_tick_loc' : None, + 'data_lim_min' : None}) + + def set_params(self, **kwargs): + if 'min_tick_loc' in kwargs: + self._min_tick_loc = kwargs.pop('min_tick_loc') + if 'max_tick_loc' in kwargs: + self._max_tick_loc = kwargs.pop('max_tick_loc') + if 'data_lim_min' in kwargs: + self._data_lim_min = kwargs.pop('data_lim_min') + matplotlib.ticker.MaxNLocator.set_params(self, **kwargs) + + def tick_values(self, vmin, vmax): + locs = list(matplotlib.ticker.MaxNLocator.tick_values(self, + vmin=vmin, + vmax=vmax)) + + # if `data_lim_min` is specified, remove all ticks at or below it + if self._data_lim_min is not None: + locs = [loc + for loc in locs + if loc > self._data_lim_min] + + # add min and max violin ticks as appropriate + if self._max_tick_loc is not None: + locs.insert(0, self._max_tick_loc) + if self._min_tick_loc is not None: + locs.insert(0, self._min_tick_loc) + + locs = np.array(locs) + return self.raise_if_exceeds(locs) + +class _ViolinLogLocator(matplotlib.ticker.LogLocator): + """ + Default log tick locator aware of min, max, and zero violins. + + Parameters + ---------- + min_tick_loc : int or float, optional + Location of min violin tick. Default is None. + max_tick_loc : int or float, optional + Location of max violin tick. Default is None. + zero_tick_loc : int or float, optional + Location of zero violin tick. Default is None. + data_lim_min : int or float, optional + Location of lower boundary of data, at or below which ticks are not + illustrated. Default is None. + + Other parameters + ---------------- + See matplotlib.ticker.LogLocator. + + """ + def __init__(self, + min_tick_loc=None, + max_tick_loc=None, + zero_tick_loc=None, + data_lim_min=None, + **kwargs): + self._min_tick_loc = min_tick_loc + self._max_tick_loc = max_tick_loc + self._zero_tick_loc = zero_tick_loc + self._data_lim_min = data_lim_min + matplotlib.ticker.LogLocator.__init__(self, **kwargs) + + def set_params(self, **kwargs): + if 'min_tick_loc' in kwargs: + self._min_tick_loc = kwargs.pop('min_tick_loc') + if 'max_tick_loc' in kwargs: + self._max_tick_loc = kwargs.pop('max_tick_loc') + if 'zero_tick_loc' in kwargs: + self._zero_tick_loc = kwargs.pop('zero_tick_loc') + if 'data_lim_min' in kwargs: + self._data_lim_min = kwargs.pop('data_lim_min') + matplotlib.ticker.LogLocator.set_params(self, **kwargs) + + def tick_values(self, vmin, vmax): + locs = list(matplotlib.ticker.LogLocator.tick_values(self, + vmin=vmin, + vmax=vmax)) + + # if `data_lim_min` is specified, remove all ticks at or below it + if self._data_lim_min is not None: + locs = [loc + for loc in locs + if loc > self._data_lim_min] + + # add min, max, and zero violin ticks as appropriate + if self._zero_tick_loc is not None: + locs.insert(0, self._zero_tick_loc) + if self._max_tick_loc is not None: + locs.insert(0, self._max_tick_loc) + if self._min_tick_loc is not None: + locs.insert(0, self._min_tick_loc) + + locs = np.array(locs) + return self.raise_if_exceeds(locs) + +class _ViolinScalarFormatter(matplotlib.ticker.ScalarFormatter): + """ + Default linear tick formatter aware of min and max violins. + + Parameters + ---------- + min_tick_loc : int or float, optional + Location of min violin tick. Default is None. + max_tick_loc : int or float, optional + Location of max violin tick. Default is None. + min_tick_label : str, optional + Label of min violin tick. Default='Min'. + max_tick_label : str, optional + Label of max violin tick. Default='Max'. + + Other parameters + ---------------- + See matplotlib.ticker.ScalarFormatter. + + """ + def __init__(self, + min_tick_loc=None, + max_tick_loc=None, + min_tick_label='Min', + max_tick_label='Max', + **kwargs): + self._min_tick_loc = min_tick_loc + self._max_tick_loc = max_tick_loc + self._min_tick_label = min_tick_label + self._max_tick_label = max_tick_label + matplotlib.ticker.ScalarFormatter.__init__(self, **kwargs) + + def __call__(self, x, pos=None): + if self._min_tick_loc is not None and x == self._min_tick_loc: + return self._min_tick_label + if self._max_tick_loc is not None and x == self._max_tick_loc: + return self._max_tick_label + return matplotlib.ticker.ScalarFormatter.__call__(self, x=x, pos=pos) + +class _ViolinLogFormatterSciNotation(matplotlib.ticker.LogFormatterSciNotation): + """ + Default log tick formatter aware of min, max, and zero violins. + + Parameters + ---------- + min_tick_loc : int or float, optional + Location of min violin tick. Default is None. + max_tick_loc : int or float, optional + Location of max violin tick. Default is None. + zero_tick_loc : int or float, optional + Location of zero violin tick. Default is None. + min_tick_label : str, optional + Label of min violin tick. Default='Min'. + max_tick_label : str, optional + Label of max violin tick. Default='Max'. + zero_tick_label : str, optional + Label of zero violin tick. Default is generated by + matplotlib.ticker.LogFormatterSciNotation with x=0. + + Other parameters + ---------------- + See matplotlib.ticker.LogFormatterSciNotation. + + """ + def __init__(self, + min_tick_loc=None, + max_tick_loc=None, + zero_tick_loc=None, + min_tick_label='Min', + max_tick_label='Max', + zero_tick_label=None, + **kwargs): + self._min_tick_loc = min_tick_loc + self._max_tick_loc = max_tick_loc + self._zero_tick_loc = zero_tick_loc + self._min_tick_label = min_tick_label + self._max_tick_label = max_tick_label + if zero_tick_label is None: + self._zero_tick_label = \ + matplotlib.ticker.LogFormatterSciNotation.__call__(self, x=0) + else: + self._zero_tick_label = zero_tick_label + matplotlib.ticker.LogFormatterSciNotation.__init__(self, **kwargs) + + def __call__(self, x, pos=None): + if self._min_tick_loc is not None and x == self._min_tick_loc: + return self._min_tick_label + if self._max_tick_loc is not None and x == self._max_tick_loc: + return self._max_tick_label + if self._zero_tick_loc is not None and x == self._zero_tick_loc: + return self._zero_tick_label + return matplotlib.ticker.LogFormatterSciNotation.__call__(self, + x=x, + pos=pos) + +### +# CUSTOM SCALES +### + +def _base_down(x, base=10): + """ + Floor `x` to the nearest lower ``base^n``, where ``n`` is an integer. + + Parameters + ---------- + x : float + Number to calculate the floor from. + base : float, optional + Base used to calculate the floor. + + Return + ------ + float + The nearest lower ``base^n`` from `x`, where ``n`` is an integer. + + """ + if x == 0.0: + return -base + lx = np.floor(np.log(x) / np.log(base)) + return base ** lx + + +def _base_up(x, base=10): + """ + Ceil `x` to the nearest higher ``base^n``, where ``n`` is an integer. + + Parameters + ---------- + x : float + Number to calculate the ceiling from. + base : float, optional + Base used to calculate the ceiling. + + Return + ------ + float + The nearest higher ``base^n`` from `x`, where ``n`` is an integer. + + """ + if x == 0.0: + return base + lx = np.ceil(np.log(x) / np.log(base)) + return base ** lx + class _LogicleScale(matplotlib.scale.ScaleBase): """ Class that implements the logicle axis scaling. @@ -1047,6 +1288,1581 @@ def hist1d(data_list, plt.savefig(savefig, dpi=savefig_dpi) plt.close() +_ViolinRegion = collections.namedtuple(typename='_ViolinRegion', + field_names=('positive_edge', + 'negative_edge', + 'height')) + +def _plot_single_violin(violin_position, + violin_data, + violin_width, + violin_kwargs, + bin_edges, + density, + vert, + scale, + upper_trim_fraction, + lower_trim_fraction, + draw_summary_stat, + draw_summary_stat_fxn, + draw_summary_stat_kwargs): + """ + Plot a single violin. + + Illustrate the relative frequency of members of a population using a + normalized, symmetrical histogram ("violin") centered at a corresponding + position. Wider regions of the violin indicate regions that occur with + greater frequency. + + Parameters + ---------- + violin_position : scalar + Position at which to center violin. + violin_data : 1D array + A population for which to plot a violin. + violin_width : scalar + Width of violin. If `scale` is 'log', the units are decades. + violin_kwargs : dict + Keyword arguments passed to the plt.fill_between() command that + illustrates the violin. + bin_edges : array + Bin edges used to bin population members. + density : bool + `density` parameter passed to the np.histogram() command that bins + population members. If True, violin width represents relative + frequency *density* instead of relative frequency (i.e., bins are + normalized by their width). + vert : bool + Flag specifying to illustrate a vertical violin. If False, a + horizontal violin is illustrated. + scale : {'linear','log'} + Scale of the position axis (x-axis if `vert` is True, y-axis if `vert` + is False). + upper_trim_fraction : float + Fraction of members to trim (discard) from the top of the violin + (e.g., for aesthetic purposes). + lower_trim_fraction : float + Fraction of members to trim (discard) from the bottom of the violin + (e.g., for aesthetic purposes). + draw_summary_stat : bool + Flag specifying to illustrate a summary statistic. + draw_summary_stat_fxn : function + Function used to calculate the summary statistic. The summary + statistic is calculated prior to aesthetic trimming. + draw_summary_stat_kwargs : dict + Keyword arguments passed to the plt.plot() command that illustrates + the summary statistic. + + """ + if draw_summary_stat: + summary_stat = draw_summary_stat_fxn(violin_data) + + # trim outliers to get rid of long, unsightly tails + num_discard_low = int(np.floor(len(violin_data) \ + * float(lower_trim_fraction))) + num_discard_high = int(np.floor(len(violin_data) \ + * float(upper_trim_fraction))) + + violin_data = np.sort(violin_data) + + violin_data = violin_data[num_discard_low:] + violin_data = violin_data[::-1] + violin_data = violin_data[num_discard_high:] + violin_data = violin_data[::-1] + + ### + # build violin + ### + H,H_edges = np.histogram(violin_data, bins=bin_edges, density=density) + H = np.array(H, dtype=np.float) + + # duplicate histogram bin counts to serve as left and right corners of + # bars that trace violin edge + positive_edge = np.repeat(H,2) + + # add zeros to bring violin edge back to the axis + positive_edge = np.insert(positive_edge, 0, 0.0) + positive_edge = np.append(positive_edge, 0.0) + + # normalize violin width + positive_edge /= np.max(positive_edge) + + # rescale to specified violin width + positive_edge *= (violin_width/2.0) + + # reflect violin edge across the axis defined by `violin_position` + if scale == 'log': + negative_edge = np.log10(violin_position) - positive_edge + positive_edge = np.log10(violin_position) + positive_edge + + positive_edge = 10**positive_edge + negative_edge = 10**negative_edge + else: + negative_edge = violin_position - positive_edge + positive_edge = violin_position + positive_edge + + # duplicate the histogram edges to serve as violin height values + height = np.repeat(H_edges,2) + + ### + # crimp violin (i.e. remove the frequency=0 line segments between + # violin regions) + ### + violin_regions = [] + idx = 0 + if len(height) == 1: + # edge case + if positive_edge[idx] == negative_edge[idx]: + # singularity + pass + else: + violin_regions.append(_ViolinRegion(positive_edge = positive_edge, + negative_edge = negative_edge, + height = height)) + else: + # The positive and negative edges can have zero or nonzero width along + # the violin axis. Points where the width is zero represent either the + # end of a violin region, which we want to keep, or part of an inter- + # region line segment (i.e., frequency=0), which we want to discard + # for aesthetic purposes. We can distinguish between these two cases + # by looking at how the equality of the two edges changes as we + # proceed along the violin axis. + start = idx # assume we start in a violin region + while(idx < len(height)-1): + if (positive_edge[idx] == negative_edge[idx]) \ + and (positive_edge[idx+1] != negative_edge[idx+1]): + # violin region is opening + start = idx + elif (positive_edge[idx] != negative_edge[idx]) \ + and (positive_edge[idx+1] != negative_edge[idx+1]): + # violin region is continuing + pass + elif (positive_edge[idx] != negative_edge[idx]) \ + and (positive_edge[idx+1] == negative_edge[idx+1]): + # violin region is closing + end = idx+1 # include this point + violin_regions.append( + _ViolinRegion(positive_edge = positive_edge[start:end+1], + negative_edge = negative_edge[start:end+1], + height = height[start:end+1])) + start = None # we are no longer in a violin region + elif (positive_edge[idx] == negative_edge[idx]) \ + and (positive_edge[idx+1] == negative_edge[idx+1]): + # we are in an inter-region segment + start = None + + idx += 1 + + if start is not None: + # if we were still in a violin region at the end, + # add the last region to the list + end = idx # include this point + violin_regions.append( + _ViolinRegion(positive_edge = positive_edge[start:end+1], + negative_edge = negative_edge[start:end+1], + height = height[start:end+1])) + + # illustrate violin + if vert: + for vr in violin_regions: + plt.fill_betweenx(x1=vr.negative_edge, + x2=vr.positive_edge, + y=vr.height, + **violin_kwargs) + else: + for vr in violin_regions: + plt.fill_between(y1=vr.positive_edge, + y2=vr.negative_edge, + x=vr.height, + **violin_kwargs) + + # illustrate summary statistic + if draw_summary_stat: + if scale == 'log': + positive_edge = np.log10(violin_position) + (violin_width/2.0) + negative_edge = np.log10(violin_position) - (violin_width/2.0) + + positive_edge = 10**positive_edge + negative_edge = 10**negative_edge + else: + positive_edge = violin_position + (violin_width/2.0) + negative_edge = violin_position - (violin_width/2.0) + + if vert: + plt.plot([negative_edge, positive_edge], + [summary_stat, summary_stat], + **draw_summary_stat_kwargs) + else: + plt.plot([summary_stat, summary_stat], + [negative_edge, positive_edge], + **draw_summary_stat_kwargs) + +def violin(data, + channel=None, + positions=None, + violin_width=None, + xscale=None, + yscale=None, + xlim=None, + ylim=None, + vert=True, + num_bins=100, + bin_edges=None, + density=False, + upper_trim_fraction=0.01, + lower_trim_fraction=0.01, + violin_width_to_span_fraction=0.1, + violin_kwargs=None, + draw_summary_stat=True, + draw_summary_stat_fxn=np.mean, + draw_summary_stat_kwargs=None, + log_zero_tick_label=None, + draw_log_zero_divider=True, + draw_log_zero_divider_kwargs=None, + xlabel=None, + ylabel=None, + title=None, + savefig=None): + """ + Plot violin plot. + + Illustrate the relative frequency of members of different populations + using normalized, symmetrical histograms ("violins") centered at + corresponding positions. Wider regions of violins indicate regions that + occur with greater frequency. + + Parameters + ---------- + data : 1D or ND array or list of 1D or ND arrays + A population or collection of populations for which to plot violins. + If ND arrays are used (e.g., FCSData), `channel` must be specified. + channel : int or str, optional + Channel from `data` to plot. If specified, data are assumed to be ND + arrays. String channel specifications are only supported for data + types that support string-based indexing (e.g., FCSData). + positions : scalar or array, optional + Positions at which to center violins. + violin_width : scalar, optional + Width of violin. If the scale of the position axis (`xscale` if `vert` + is True, `yscale` if `vert` is False) is 'log', the units are decades. + If not specified, `violin_width` is calculated from the limits of the + position axis (`xlim` if `vert` is True, `ylim` if `vert` is False) + and `violin_width_to_span_fraction`. If only one violin is specified + in `data`, `violin_width` = 0.5. + savefig : str, optional + The name of the file to save the figure to. If None, do not save. + + Other parameters + ---------------- + xscale : {'linear','log','logicle'}, optional + Scale of the x-axis. 'logicle' is only supported for horizontal violin + plots (i.e., when `vert` is False). Default is 'linear' if `vert` is + True, 'logicle' if `vert` is False. + yscale : {'logicle','linear','log'}, optional + Scale of the y-axis. If `vert` is False, 'logicle' is not supported. + Default is 'logicle' if `vert` is True, 'linear' if `vert` is False. + xlim, ylim : tuple, optional + Limits of the x-axis and y-axis views. If not specified, the view of + the position axis (`xlim` if `vert` is True, `ylim` if `vert` if + False) is calculated to pad the extreme violins with + 0.5*`violin_width`. If `violin_width` is also not specified, + `violin_width` is calculated to satisfy the 0.5*`violin_width` padding + and `violin_width_to_span_fraction`. If not specified, the view of the + data axis (`ylim` if `vert` is True, `xlim` if `vert` is False) is + calculated to span all violins (before they are aesthetically + trimmed). + vert : bool, optional + Flag specifying to illustrate a vertical violin plot. If False, a + horizontal violin plot is illustrated. + num_bins : int, optional + Number of bins to bin population members. Ignored if `bin_edges` is + specified. + bin_edges : array or list of arrays, optional + Bin edges used to bin population members. Bin edges can be specified + for individual violins using a list of arrays of the same length as + `data`. If not specified, `bin_edges` is calculated to span the data + axis (`ylim` if `vert` is True, `xlim` if `vert` is False) logicly, + linearly, or logarithmically (based on the scale of the data axis; + `yscale` if `vert` is True, `xscale` if `vert` is False) using + `num_bins`. + density : bool, optional + `density` parameter passed to the np.histogram() command that bins + population members for each violin. If True, violin width represents + relative frequency *density* instead of relative frequency (i.e., bins + are normalized by their width). + upper_trim_fraction : float or list of floats, optional + Fraction of members to trim (discard) from the top of the violin + (e.g., for aesthetic purposes). Upper trim fractions can be specified + for individual violins using a list of floats of the same length as + `data`. + lower_trim_fraction : float or list of floats, optional + Fraction of members to trim (discard) from the bottom of the violin + (e.g., for aesthetic purposes). Lower trim fractions can be specified + for individual violins using a list of floats of the same length as + `data`. + violin_width_to_span_fraction : float, optional + Fraction of the position axis span (`xlim` if `vert` is True, `ylim` + if `vert` is False) that a violin should span. Ignored if + `violin_width` is specified. + violin_kwargs : dict or list of dicts, optional + Keyword arguments passed to the plt.fill_between() command that + illustrates each violin. Keyword arguments can be specified for + individual violins using a list of dicts of the same length as `data`. + Default = {'facecolor':'gray', 'edgecolor':'black'}. + draw_summary_stat : bool, optional + Flag specifying to illustrate a summary statistic for each violin. + draw_summary_stat_fxn : function, optional + Function used to calculate the summary statistic for each violin. + Summary statistics are calculated prior to aesthetic trimming. + draw_summary_stat_kwargs : dict or list of dicts, optional + Keyword arguments passed to the plt.plot() command that illustrates + each violin's summary statistic. Keyword arguments can be specified + for individual violins using a list of dicts of the same length as + `data`. Default = {'color':'black'}. + log_zero_tick_label : str, optional + Label of position=0 violin tick if the position axis scale (`xscale` + if `vert` is True, `yscale` if `vert` is False) is 'log'. Default is + generated by the default log tick formatter + (matplotlib.ticker.LogFormatterSciNotation) with x=0. + draw_log_zero_divider : bool, optional + Flag specifying to illustrate a line separating the position=0 violin + from the other violins if the position axis scale (`xscale` if `vert` + is True, `yscale` if `vert` is False) is 'log'. + draw_log_zero_divider_kwargs : dict, optional + Keyword arguments passed to the plt.axvline() or plt.axhline() command + that illustrates the position=0 violin divider. Default = + {'color':'gray','linestyle':':'}. + xlabel, ylabel : str, optional + Labels to use on the x and y axes. If a label for the data axis is not + specified (`ylabel` if `vert` is True, `xlabel` if `vert` is False), + the channel name will be used if possible (extracted from the last + data object). + title : str, optional + Plot title. + + """ + + ### + # understand inputs + ### + + # populate default input values + if xscale is None: + xscale = 'linear' if vert else 'logicle' + if yscale is None: + yscale = 'logicle' if vert else 'linear' + + if violin_kwargs is None: + violin_kwargs = {'facecolor':'gray', 'edgecolor':'black'} + + if draw_summary_stat_kwargs is None: + draw_summary_stat_kwargs = {'color':'black'} + if draw_log_zero_divider_kwargs is None: + draw_log_zero_divider_kwargs = {'color':'gray', 'linestyle':':'} + + # check x and y scales + if vert: + if xscale not in ('linear', 'log'): + msg = "when `vert` is True, `xscale` must be 'linear' or 'log'" + raise ValueError(msg) + + if yscale not in ('logicle', 'linear', 'log'): + msg = "when `vert` is True, `yscale` must be 'logicle'," + msg += " 'linear', or 'log'" + raise ValueError(msg) + + data_scale = yscale + position_scale = xscale + else: + if xscale not in ('logicle', 'linear', 'log'): + msg = "when `vert` is False, `xscale` must be 'logicle'," + msg += " 'linear', or 'log'" + raise ValueError(msg) + + if yscale not in ('linear', 'log'): + msg = "when `vert` is False, `yscale` must be 'linear' or 'log'" + raise ValueError(msg) + + data_scale = xscale + position_scale = yscale + + # understand `data` + if channel is None: + # assume 1D sequence or sequence of 1D sequences + try: + first_element = next(iter(data)) + except TypeError: + msg = "`data` should be 1D array or list of 1D arrays." + msg += " Specify `channel` to use ND array or list of ND" + msg += " arrays." + raise TypeError(msg) + + # promote singleton if necessary + try: + iter(first_element) # success => sequence of 1D sequences + data_length = len(data) + except TypeError: + data = [data] + data_length = 1 + else: + # assume ND sequence or sequence of ND sequences + try: + first_element = next(iter(data)) + first_element_first_element = next(iter(first_element)) + except TypeError: + msg = "`data` should be ND array or list of ND arrays." + msg += " Set `channel` to None to use 1D array or list of" + msg += " 1D arrays." + raise TypeError(msg) + + # promote singleton if necessary + try: + iter(first_element_first_element) # success => sequence of ND sequences + data_length = len(data) + except TypeError: + data = [data] + data_length = 1 + + # exctract channel + try: + data = [d[:,channel] for d in data] + except TypeError: + data = [[row[channel] for row in d] for d in data] + + # understand `positions` + if positions is None: + positions = np.arange(1,data_length+1, dtype=np.float) + if position_scale == 'log': + positions = 10**positions + positions_length = len(positions) + else: + try: + positions_length = len(positions) + except TypeError: + positions = [positions] + positions_length = 1 + + if positions_length != data_length: + msg = "`positions` must have the same length as `data`" + raise ValueError(msg) + + # calculate default limit of data axis if necessary. To do so, take min + # and max values of all data. + if (vert and ylim is None) or (not vert and xlim is None): + data_min = np.inf + data_max = -np.inf + for idx in range(data_length): + violin_data = np.array(data[idx], dtype=np.float).flat + violin_min = np.min(violin_data) + violin_max = np.max(violin_data) + if violin_min < data_min: + data_min = violin_min + if violin_max > data_max: + data_max = violin_max + data_lim = (data_min, data_max) + else: + data_lim = ylim if vert else xlim + + # calculate violin bin edges if necessary + if bin_edges is None: + if data_scale == 'logicle': + t = _LogicleTransform(data=data, channel=channel) + t_min = t.inverted().transform_non_affine(data_lim[0]) + t_max = t.inverted().transform_non_affine(data_lim[1]) + t_bin_edges = np.linspace(t_min, t_max, num_bins+1) + bin_edges = t.transform_non_affine(t_bin_edges) + elif data_scale == 'linear': + bin_edges = np.linspace(data_lim[0], data_lim[1], num_bins+1) + else: + bin_edges = np.logspace(np.log10(data_lim[0]), + np.log10(data_lim[1]), + num_bins+1) + + # set position=0 violin aside to be plotted separately if position axis is + # log + log_zero_data = None + log_zero_violin_kwargs = None + log_zero_draw_summary_stat_kwargs = None + log_zero_bin_edges = None + log_zero_upper_trim_fraction = None + log_zero_lower_trim_fraction = None + + if position_scale == 'log' and 0 in list(positions): + data = list(data) + positions = list(positions) + + zero_idx = [idx + for idx,pos in enumerate(positions) + if pos == 0] + + if len(zero_idx) > 1: + msg = "attempting to separately illustrate position=0 violin," + msg += " but found multiple instances" + raise ValueError(msg) + zero_idx = zero_idx[0] + + log_zero_data = data.pop(zero_idx) + del positions[zero_idx] + data_length = len(data) + positions_length = len(positions) + + # set aside position=0 violin parameters + if isinstance(violin_kwargs, collectionsAbc.Sequence): + violin_kwargs = list(violin_kwargs) + log_zero_violin_kwargs = violin_kwargs.pop(zero_idx) + else: + log_zero_violin_kwargs = violin_kwargs + + if isinstance(draw_summary_stat_kwargs, collectionsAbc.Sequence): + draw_summary_stat_kwargs = list(draw_summary_stat_kwargs) + log_zero_draw_summary_stat_kwargs = \ + draw_summary_stat_kwargs.pop(zero_idx) + else: + log_zero_draw_summary_stat_kwargs = draw_summary_stat_kwargs + + if bin_edges is not None: + try: + first_element = next(iter(bin_edges)) + try: + iter(first_element) # success => sequence of sequences + + bin_edges = list(bin_edges) + log_zero_bin_edges = bin_edges.pop(zero_idx) + except TypeError: + # sequence of scalars + log_zero_bin_edges = bin_edges + except TypeError: + msg = "`bin_edges` should be array or list of arrays" + raise TypeError(msg) + + if isinstance(upper_trim_fraction, collectionsAbc.Sequence): + upper_trim_fraction = list(upper_trim_fraction) + log_zero_upper_trim_fraction = upper_trim_fraction.pop(zero_idx) + else: + log_zero_upper_trim_fraction = upper_trim_fraction + + if isinstance(lower_trim_fraction, collectionsAbc.Sequence): + lower_trim_fraction = list(lower_trim_fraction) + log_zero_lower_trim_fraction = lower_trim_fraction.pop(zero_idx) + else: + log_zero_lower_trim_fraction = lower_trim_fraction + + # calculate violin_width and limit of position axis if necessary. To do + # so, pad position axis limit one violin_width away from extreme violin + # positions. + if (vert and xlim is None) or (not vert and ylim is None): + if violin_width is None: + if data_length == 1: + # edge case + violin_width = 0.5 + elif position_scale == 'log': + log_positions_span = np.log10(np.max(positions)) \ + - np.log10(np.min(positions)) + log_span = log_positions_span \ + / (1 - 2.0*violin_width_to_span_fraction) + violin_width = violin_width_to_span_fraction*log_span + else: + positions_span = np.max(positions) - np.min(positions) + span = positions_span \ + / (1 - 2.0*violin_width_to_span_fraction) + violin_width = violin_width_to_span_fraction*span + + if position_scale == 'log': + position_lim = (10**(np.log10(np.min(positions))-violin_width), + 10**(np.log10(np.max(positions))+violin_width)) + else: + position_lim = (np.min(positions)-violin_width, + np.max(positions)+violin_width) + else: + position_lim = xlim if vert else ylim + + if violin_width is None: + if position_scale == 'log': + log_span = np.log10(position_lim[1]) - np.log10(position_lim[0]) + violin_width = violin_width_to_span_fraction*log_span + else: + span = position_lim[1] - position_lim[0] + violin_width = violin_width_to_span_fraction*span + + ### + # plot violins + ### + for idx in range(data_length): + violin_position = positions[idx] + violin_data = np.array(data[idx], dtype=np.float).flat + + # understand violin_kwargs + if isinstance(violin_kwargs, collectionsAbc.Sequence): + v_kwargs = violin_kwargs[idx] + else: + v_kwargs = violin_kwargs + + # understand draw_summary_stat_kwargs + if isinstance(draw_summary_stat_kwargs, collectionsAbc.Sequence): + v_draw_summary_stat_kwargs = draw_summary_stat_kwargs[idx] + else: + v_draw_summary_stat_kwargs = draw_summary_stat_kwargs + + # understand bin_edges + try: + first_element = next(iter(bin_edges)) + try: + iter(first_element) # success => sequence of sequences + violin_bin_edges = bin_edges[idx] + except TypeError: + violin_bin_edges = bin_edges + except TypeError: + msg = "`bin_edges` should be array or list or arrays" + raise TypeError(msg) + + # understand upper and lower trim fractions + if isinstance(upper_trim_fraction, collectionsAbc.Sequence): + v_upper_trim_fraction = upper_trim_fraction[idx] + else: + v_upper_trim_fraction = upper_trim_fraction + + if isinstance(lower_trim_fraction, collectionsAbc.Sequence): + v_lower_trim_fraction = lower_trim_fraction[idx] + else: + v_lower_trim_fraction = lower_trim_fraction + + _plot_single_violin( + violin_position=violin_position, + violin_data=violin_data, + violin_width=violin_width, + violin_kwargs=v_kwargs, + bin_edges=violin_bin_edges, + density=density, + vert=vert, + scale=position_scale, + upper_trim_fraction=v_upper_trim_fraction, + lower_trim_fraction=v_lower_trim_fraction, + draw_summary_stat=draw_summary_stat, + draw_summary_stat_fxn=draw_summary_stat_fxn, + draw_summary_stat_kwargs=v_draw_summary_stat_kwargs) + + ### + # plot zero violin if necessary + ### + data_position_lim = position_lim + if position_scale == 'log' and log_zero_data is not None: + next_violin_position = \ + 10**(np.log10(data_position_lim[0]) - violin_width) + + _plot_single_violin( + violin_position=next_violin_position, + violin_data=log_zero_data, + violin_width=violin_width, + violin_kwargs=log_zero_violin_kwargs, + bin_edges=log_zero_bin_edges, + density=density, + vert=vert, + scale=position_scale, + upper_trim_fraction=log_zero_upper_trim_fraction, + lower_trim_fraction=log_zero_lower_trim_fraction, + draw_summary_stat=draw_summary_stat, + draw_summary_stat_fxn=draw_summary_stat_fxn, + draw_summary_stat_kwargs=log_zero_draw_summary_stat_kwargs) + + if draw_log_zero_divider: + if vert: + plt.axvline(10**(np.log10(next_violin_position)+violin_width), + **draw_log_zero_divider_kwargs) + else: + plt.axhline(10**(np.log10(next_violin_position)+violin_width), + **draw_log_zero_divider_kwargs) + + position_lim = (10**(np.log10(next_violin_position) - violin_width), + position_lim[1]) + + if xscale == 'logicle': + plt.xscale(xscale, data=data, channel=channel) + else: + plt.xscale(xscale) + if yscale == 'logicle': + plt.yscale(yscale, data=data, channel=channel) + else: + plt.yscale(yscale) + + if vert: + plt.xlim(position_lim) + plt.ylim(data_lim) + else: + plt.xlim(data_lim) + plt.ylim(position_lim) + + # set position axis locators and formatters + ax = plt.gca() + if position_scale == 'log': + if log_zero_data is not None: + next_violin_position = 10**(np.log10(data_position_lim[0]) - violin_width) + zero_tick_loc = next_violin_position + data_lim_min = data_position_lim[0] + else: + zero_tick_loc = None + data_lim_min = None + + major_locator = _ViolinLogLocator(zero_tick_loc=zero_tick_loc, + data_lim_min=data_lim_min) + minor_locator = _ViolinLogLocator(zero_tick_loc=None, + data_lim_min=data_lim_min, + subs='auto') + major_formatter = _ViolinLogFormatterSciNotation( + zero_tick_loc=zero_tick_loc, + zero_tick_label=log_zero_tick_label) + minor_formatter = _ViolinLogFormatterSciNotation( + zero_tick_loc=zero_tick_loc, + zero_tick_label=log_zero_tick_label) + + if vert: + ax.xaxis.set_major_locator(major_locator) + ax.xaxis.set_major_formatter(major_formatter) + ax.xaxis.set_minor_locator(minor_locator) + ax.xaxis.set_minor_formatter(minor_formatter) + else: + ax.yaxis.set_major_locator(major_locator) + ax.yaxis.set_major_formatter(major_formatter) + ax.yaxis.set_minor_locator(minor_locator) + ax.yaxis.set_minor_formatter(minor_formatter) + else: + data_lim_min = None + + major_locator = _ViolinAutoLocator(data_lim_min=data_lim_min) + major_formatter = _ViolinScalarFormatter() + + if vert: + ax.xaxis.set_major_locator(major_locator) + ax.xaxis.set_major_formatter(major_formatter) + else: + ax.yaxis.set_major_locator(major_locator) + ax.yaxis.set_major_formatter(major_formatter) + + if xlabel is not None: + plt.xlabel(xlabel) + if ylabel is not None: + plt.ylabel(ylabel) + if vert and ylabel is None and hasattr(data[-1], 'channels'): + plt.ylabel(data[-1].channels[0]) + elif not vert and xlabel is None and hasattr(data[-1], 'channels'): + plt.xlabel(data[-1].channels[0]) + + if title is not None: + plt.title(title) + + if savefig is not None: + plt.tight_layout() + plt.savefig(savefig, dpi=savefig_dpi) + plt.close() + +def violin_dose_response(data, + channel=None, + positions=None, + min_data=None, + max_data=None, + violin_width=None, + model_fxn=None, + xscale='linear', + yscale='logicle', + xlim=None, + ylim=None, + violin_width_to_span_fraction=0.1, + num_bins=100, + bin_edges=None, + density=False, + upper_trim_fraction=0.01, + lower_trim_fraction=0.01, + violin_kwargs=None, + draw_summary_stat=True, + draw_summary_stat_fxn=np.mean, + draw_summary_stat_kwargs=None, + log_zero_tick_label=None, + min_bin_edges=None, + min_upper_trim_fraction=0.01, + min_lower_trim_fraction=0.01, + min_violin_kwargs=None, + min_draw_summary_stat_kwargs=None, + draw_min_line=True, + draw_min_line_kwargs=None, + min_tick_label='Min', + max_bin_edges=None, + max_upper_trim_fraction=0.01, + max_lower_trim_fraction=0.01, + max_violin_kwargs=None, + max_draw_summary_stat_kwargs=None, + draw_max_line=True, + draw_max_line_kwargs=None, + max_tick_label='Max', + draw_model_kwargs=None, + draw_log_zero_divider=True, + draw_log_zero_divider_kwargs=None, + draw_minmax_divider=True, + draw_minmax_divider_kwargs=None, + xlabel=None, + ylabel=None, + title=None, + savefig=None): + """ + Plot violin plot with min data, max data, and mathematical model. + + Plot a violin plot (see `plot.violin()` description) with vertical violins + and separately illustrate a min violin, a max violin, and a mathematical + model. Useful for illustrating "dose response" or "transfer" functions, + which benefit from the added context of minimum and maximum bounds and + which are often described by mathematical models. Min and max violins are + illustrated to the left of the plot, and the mathematical model is + correctly illustrated even when a position=0 violin is illustrated + separately when `xscale` is 'log'. + + Parameters + ---------- + data : 1D or ND array or list of 1D or ND arrays + A population or collection of populations for which to plot violins. + If ND arrays are used (e.g., FCSData), `channel` must be specified. + channel : int or str, optional + Channel from `data` to plot. If specified, data are assumed to be ND + arrays. String channel specifications are only supported for data + types that support string-based indexing (e.g., FCSData). + positions : scalar or array, optional + Positions at which to center violins. + min_data : 1D or ND array, optional + A population representing a minimum control. This violin is separately + illustrated at the left of the plot. + max_data : 1D or ND array, optional + A population representing a maximum control. This violin is separately + illustrated at the left of the plot. + violin_width : scalar, optional + Width of violin. If `xscale` is 'log', the units are decades. If not + specified, `violin_width` is calculated from `xlim` and + `violin_width_to_span_fraction`. If only one violin is specified in + `data`, `violin_width` = 0.5. + model_fxn : function, optional + Function used to calculate model y-values. 100 x-values are linearly + (if `xscale` is 'linear') or logarithmically (if `xscale` is 'log') + generated spanning `xlim`. If `xscale` is 'log' and a position=0 + violin is specified, the result of model_fxn(0.0) is illustrated as a + horizontal line with the position=0 violin. + savefig : str, optional + The name of the file to save the figure to. If None, do not save. + + Other parameters + ---------------- + xscale : {'linear','log'}, optional + Scale of the x-axis. + yscale : {'logicle','linear','log'}, optional + Scale of the y-axis. + xlim : tuple, optional + Limits of the x-axis view. If not specified, `xlim` is calculated to + pad leftmost and rightmost violins with 0.5*`violin_width`. If + `violin_width` is also not specified, `violin_width` is calculated to + satisfy the 0.5*`violin_width` padding and + `violin_width_to_span_fraction`. + ylim : tuple, optional + Limits of the y-axis view. If not specified, `ylim` is calculated to + span all violins (before they are aesthetically trimmed). + violin_width_to_span_fraction : float, optional + Fraction of the x-axis span that a violin should span. Ignored if + `violin_width` is specified. + num_bins : int, optional + Number of bins to bin population members. Ignored if `bin_edges` is + specified. + bin_edges : array or list of arrays, optional + Bin edges used to bin population members for `data` violins. Bin edges + can be specified for individual violins using a list of arrays of the + same length as `data`. If not specified, `bin_edges` is calculated to + span `ylim` logicly (if `yscale` is 'logicle'), linearly (if `yscale` + is 'linear'), or logarithmically (if `yscale` is 'log') using + `num_bins`. + density : bool, optional + `density` parameter passed to the np.histogram() command that bins + population members for each violin. If True, violin width represents + relative frequency *density* instead of relative frequency (i.e., bins + are normalized by their width). + upper_trim_fraction : float or list of floats, optional + Fraction of members to trim (discard) from the top of the `data` + violins (e.g., for aesthetic purposes). Upper trim fractions can be + specified for individual violins using a list of floats of the same + length as `data`. + lower_trim_fraction : float or list of floats, optional + Fraction of members to trim (discard) from the bottom of the `data` + violins (e.g., for aesthetic purposes). Lower trim fractions can be + specified for individual violins using a list of floats of the same + length as `data`. + violin_kwargs : dict or list of dicts, optional + Keyword arguments passed to the plt.fill_betweenx() command that + illustrates the `data` violins. Keyword arguments can be specified for + individual violins using a list of dicts of the same length as `data`. + Default = {'facecolor':'gray', 'edgecolor':'black'}. + draw_summary_stat : bool, optional + Flag specifying to illustrate a summary statistic for each violin. + draw_summary_stat_fxn : function, optional + Function used to calculate the summary statistic for each violin. + Summary statistics are calculated prior to aesthetic trimming. + draw_summary_stat_kwargs : dict or list of dicts, optional + Keyword arguments passed to the plt.plot() command that illustrates + the `data` violin summary statistics. Keyword arguments can be + specified for individual violins using a list of dicts of the same + length as `data`. Default = {'color':'black'}. + log_zero_tick_label : str, optional + Label of position=0 violin tick if `xscale` is 'log'. Default is + generated by the default log tick formatter + (matplotlib.ticker.LogFormatterSciNotation) with x=0. + min_bin_edges : array, optional + Bin edges used to bin population members for the min violin. If not + specified, `min_bin_edges` is calculated to span `ylim` logicaly (if + `yscale` is 'logicle'), linearly (if `yscale` is 'linear'), or + logarithmically (if `yscale` is 'log') using `num_bins`. + min_upper_trim_fraction : float, optional + Fraction of members to trim (discard) from the top of the min violin. + min_lower_trim_fraction : float, optional + Fraction of members to trim (discard) from the bottom of the min + violin. + min_violin_kwargs : dict, optional + Keyword arguments passed to the plt.fill_betweenx() command that + illustrates the min violin. Default = {'facecolor':'black', + 'edgecolor':'black'}. + min_draw_summary_stat_kwargs : dict, optional + Keyword arguments passed to the plt.plot() command that illustrates + the min violin summary statistic. Default = {'color':'gray'}. + draw_min_line : bool, optional + Flag specifying to illustrate a line from the min violin summary + statistic across the plot. + draw_min_line_kwargs : dict, optional + Keyword arguments passed to the plt.plot() command that illustrates + the min violin line. Default = {'color':'gray', 'linestyle':'--', + 'zorder':-2}. + min_tick_label : str, optional + Label of min violin tick. Default='Min'. + max_bin_edges : array, optional + Bin edges used to bin population members for the max violin. If not + specified, `max_bin_edges` is calculated to span `ylim` logicaly (if + `yscale` is 'logicle'), linearly (if `yscale` is 'linear'), or + logarithmically (if `yscale` is 'log') using `num_bins`. + max_upper_trim_fraction : float, optional + Fraction of members to trim (discard) from the top of the max violin. + max_lower_trim_fraction : float, optional + Fraction of members to trim (discard) from the bottom of the max + violin. + max_violin_kwargs : dict, optional + Keyword arguments passed to the plt.fill_betweenx() command that + illustrates the max violin. Default = {'facecolor':'black', + 'edgecolor':'black'}. + max_draw_summary_stat_kwargs : dict, optional + Keyword arguments passed to the plt.plot() command that illustrates + the max violin summary statistic. Default = {'color':'gray'}. + draw_max_line : bool, optional + Flag specifying to illustrate a line from the max violin summary + statistic across the plot. + draw_max_line_kwargs : dict, optional + Keyword arguments passed to the plt.plot() command that illustrates + the max violin line. Default = {'color':'gray', 'linestyle':'--', + 'zorder':-2}. + max_tick_label : str, optional + Label of max violin tick. Default='Max'. + draw_model_kwargs : dict, optional + Keyword arguments passed to the plt.plot() command that + illustrates the model. Default = {'color':'gray', 'zorder':-1, + 'solid_capstyle':'butt'}. + draw_log_zero_divider : bool, optional + Flag specifying to illustrate a line separating the position=0 violin + from the `data` violins if `xscale` is 'log'. + draw_log_zero_divider_kwargs : dict, optional + Keyword arguments passed to the plt.axvline() command that + illustrates the position=0 violin divider. Default = {'color':'gray', + 'linestyle':':'}. + draw_minmax_divider : bool, optional + Flag specifying to illustrate a vertical line separating the min and + max violins from other violins. + draw_minmax_divider_kwargs : dict, optional + Keyword arguments passed to the plt.axvline() command that + illustrates the min/max divider. Default = {'color':'gray', + 'linestyle':'-'}. + xlabel : str, optional + Label to use on the x-axis. + ylabel : str, optional + Label to use on the y-axis. If None, channel name will be used if + possible (extracted from the last data object). + title : str, optional + Plot title. + + """ + + ### + # understand inputs + ### + + # populate default input values + if violin_kwargs is None: + violin_kwargs = {'facecolor':'gray', 'edgecolor':'black'} + if min_violin_kwargs is None: + min_violin_kwargs = {'facecolor':'black', 'edgecolor':'black'} + if max_violin_kwargs is None: + max_violin_kwargs = {'facecolor':'black', 'edgecolor':'black'} + + if draw_summary_stat_kwargs is None: + draw_summary_stat_kwargs = {'color':'black'} + if min_draw_summary_stat_kwargs is None: + min_draw_summary_stat_kwargs = {'color':'gray'} + if max_draw_summary_stat_kwargs is None: + max_draw_summary_stat_kwargs = {'color':'gray'} + + if draw_min_line_kwargs is None: + draw_min_line_kwargs = {'color':'gray', 'linestyle':'--', 'zorder':-2} + if draw_max_line_kwargs is None: + draw_max_line_kwargs = {'color':'gray', 'linestyle':'--', 'zorder':-2} + + if draw_model_kwargs is None: + draw_model_kwargs = {'color':'gray', + 'zorder':-1, + 'solid_capstyle':'butt'} + + if draw_log_zero_divider_kwargs is None: + draw_log_zero_divider_kwargs = {'color':'gray', 'linestyle':':'} + if draw_minmax_divider_kwargs is None: + draw_minmax_divider_kwargs = {'color':'gray', 'linestyle':'-'} + + # check x and y scales + if xscale not in ('linear', 'log'): + msg = "`xscale` must be 'linear' or 'log'" + raise ValueError(msg) + + if yscale not in ('logicle', 'linear', 'log'): + msg = "`yscale` must be 'logicle', 'linear', or 'log'" + raise ValueError(msg) + + # understand `data` + if channel is None: + # assume 1D sequence or sequence of 1D sequences + try: + first_element = next(iter(data)) + except TypeError: + msg = "`data` should be 1D sequence or sequence of 1D sequences." + msg += " Specify `channel` to use ND sequence or sequence of ND" + msg += " sequences." + raise TypeError(msg) + + # promote singleton if necessary + try: + iter(first_element) # success => sequence of 1D sequences + data_length = len(data) + except TypeError: + data = [data] + data_length = 1 + else: + # assume ND sequence or sequence of ND sequences + try: + first_element = next(iter(data)) + first_element_first_element = next(iter(first_element)) + except TypeError: + msg = "`data` should be ND sequence or sequence of ND sequences." + msg += " Set `channel` to None to use 1D sequence or sequence of" + msg += " 1D sequences." + raise TypeError(msg) + + # promote singleton if necessary + try: + iter(first_element_first_element) # success => sequence of ND sequences + data_length = len(data) + except TypeError: + data = [data] + data_length = 1 + + # exctract channel + try: + data = [d[:,channel] for d in data] + except TypeError: + data = [[row[channel] for row in d] for d in data] + if min_data is not None: + try: + min_data = min_data[:,channel] + except TypeError: + min_data = [row[channel] for row in min_data] + if max_data is not None: + try: + max_data = max_data[:,channel] + except TypeError: + max_data = [row[channel] for row in max_data] + + # understand `positions` + if positions is None: + positions = np.arange(1,data_length+1, dtype=np.float) + if xscale == 'log': + positions = 10**positions + positions_length = len(positions) + else: + try: + positions_length = len(positions) + except TypeError: + positions = [positions] + positions_length = 1 + + if positions_length != data_length: + msg = "`positions` must have the same length as `data`" + raise ValueError(msg) + + # calculate default ylim if necessary. To do so, take min and max values + # of all data. + all_data = list(data) + if min_data is not None: + all_data.append(min_data) + if max_data is not None: + all_data.append(max_data) + + if ylim is None: + ymin = np.inf + ymax = -np.inf + for idx in range(len(all_data)): + violin_data = np.array(all_data[idx], dtype=np.float).flat + violin_min = np.min(violin_data) + violin_max = np.max(violin_data) + if violin_min < ymin: + ymin = violin_min + if violin_max > ymax: + ymax = violin_max + ylim = (ymin, ymax) + + # calculate violin bin edges if necessary + if bin_edges is None: + if yscale == 'logicle': + t = _LogicleTransform(data=all_data, channel=channel) + t_ymin = t.inverted().transform_non_affine(ylim[0]) + t_ymax = t.inverted().transform_non_affine(ylim[1]) + t_bin_edges = np.linspace(t_ymin, t_ymax, num_bins+1) + bin_edges = t.transform_non_affine(t_bin_edges) + elif yscale == 'linear': + bin_edges = np.linspace(ylim[0], ylim[1], num_bins+1) + else: + bin_edges = np.logspace(np.log10(ylim[0]), + np.log10(ylim[1]), + num_bins+1) + if min_bin_edges is None: + if yscale == 'logicle': + t = _LogicleTransform(data=all_data, channel=channel) + t_ymin = t.inverted().transform_non_affine(ylim[0]) + t_ymax = t.inverted().transform_non_affine(ylim[1]) + t_min_bin_edges = np.linspace(t_ymin, t_ymax, num_bins+1) + min_bin_edges = t.transform_non_affine(t_min_bin_edges) + elif yscale == 'linear': + min_bin_edges = np.linspace(ylim[0], ylim[1], num_bins+1) + else: + min_bin_edges = np.logspace(np.log10(ylim[0]), + np.log10(ylim[1]), + num_bins+1) + if max_bin_edges is None: + if yscale == 'logicle': + t = _LogicleTransform(data=all_data, channel=channel) + t_ymin = t.inverted().transform_non_affine(ylim[0]) + t_ymax = t.inverted().transform_non_affine(ylim[1]) + t_max_bin_edges = np.linspace(t_ymin, t_ymax, num_bins+1) + max_bin_edges = t.transform_non_affine(t_max_bin_edges) + elif yscale == 'linear': + max_bin_edges = np.linspace(ylim[0], ylim[1], num_bins+1) + else: + max_bin_edges = np.logspace(np.log10(ylim[0]), + np.log10(ylim[1]), + num_bins+1) + + # set position=0 violin aside to be plotted separately if log x-axis + log_zero_data = None + log_zero_violin_kwargs = None + log_zero_draw_summary_stat_kwargs = None + log_zero_bin_edges = None + log_zero_upper_trim_fraction = None + log_zero_lower_trim_fraction = None + + if xscale == 'log' and 0 in list(positions): + data = list(data) + positions = list(positions) + + zero_idx = [idx + for idx,pos in enumerate(positions) + if pos == 0] + + if len(zero_idx) > 1: + msg = "attempting to separately illustrate position=0 violin," + msg += " but found multiple instances" + raise ValueError(msg) + zero_idx = zero_idx[0] + + log_zero_data = data.pop(zero_idx) + del positions[zero_idx] + data_length = len(data) + positions_length = len(positions) + + # set aside position=0 violin parameters + if isinstance(violin_kwargs, collectionsAbc.Sequence): + violin_kwargs = list(violin_kwargs) + log_zero_violin_kwargs = violin_kwargs.pop(zero_idx) + else: + log_zero_violin_kwargs = violin_kwargs + + if isinstance(draw_summary_stat_kwargs, collectionsAbc.Sequence): + draw_summary_stat_kwargs = list(draw_summary_stat_kwargs) + log_zero_draw_summary_stat_kwargs = \ + draw_summary_stat_kwargs.pop(zero_idx) + else: + log_zero_draw_summary_stat_kwargs = draw_summary_stat_kwargs + + if bin_edges is not None: + try: + first_element = next(iter(bin_edges)) + try: + iter(first_element) # success => sequence of sequences + + bin_edges = list(bin_edges) + log_zero_bin_edges = bin_edges.pop(zero_idx) + except TypeError: + # sequence of scalars + log_zero_bin_edges = bin_edges + except TypeError: + msg = "`bin_edges` should be array or list of arrays" + raise TypeError(msg) + + if isinstance(upper_trim_fraction, collectionsAbc.Sequence): + upper_trim_fraction = list(upper_trim_fraction) + log_zero_upper_trim_fraction = upper_trim_fraction.pop(zero_idx) + else: + log_zero_upper_trim_fraction = upper_trim_fraction + + if isinstance(lower_trim_fraction, collectionsAbc.Sequence): + lower_trim_fraction = list(lower_trim_fraction) + log_zero_lower_trim_fraction = lower_trim_fraction.pop(zero_idx) + else: + log_zero_lower_trim_fraction = lower_trim_fraction + + # calculate xlim and violin_width if necessary. To do so, pad xlim one + # violin_width away from extreme violin positions. + if xlim is None: + if violin_width is None: + if data_length == 1: + # edge case + violin_width = 0.5 + elif xscale == 'log': + log_positions_span = np.log10(np.max(positions)) \ + - np.log10(np.min(positions)) + log_span = log_positions_span \ + / (1 - 2.0*violin_width_to_span_fraction) + violin_width = violin_width_to_span_fraction*log_span + else: + positions_span = np.max(positions) - np.min(positions) + span = positions_span \ + / (1 - 2.0*violin_width_to_span_fraction) + violin_width = violin_width_to_span_fraction*span + + if xscale == 'log': + xlim = (10**(np.log10(np.min(positions))-violin_width), + 10**(np.log10(np.max(positions))+violin_width)) + else: + xlim = (np.min(positions)-violin_width, + np.max(positions)+violin_width) + + if violin_width is None: + if xscale == 'log': + log_span = np.log10(xlim[1]) - np.log10(xlim[0]) + violin_width = violin_width_to_span_fraction*log_span + else: + span = xlim[1] - xlim[0] + violin_width = violin_width_to_span_fraction*span + + ### + # plot violins + ### + for idx in range(data_length): + violin_position = positions[idx] + violin_data = np.array(data[idx], dtype=np.float).flat + + # understand violin_kwargs + if isinstance(violin_kwargs, collectionsAbc.Sequence): + v_kwargs = violin_kwargs[idx] + else: + v_kwargs = violin_kwargs + + # understand draw_summary_stat_kwargs + if isinstance(draw_summary_stat_kwargs, collectionsAbc.Sequence): + v_draw_summary_stat_kwargs = draw_summary_stat_kwargs[idx] + else: + v_draw_summary_stat_kwargs = draw_summary_stat_kwargs + + # understand bin_edges + try: + first_element = next(iter(bin_edges)) + try: + iter(first_element) # success => sequence of sequences + violin_bin_edges = bin_edges[idx] + except TypeError: + violin_bin_edges = bin_edges + except TypeError: + msg = "`bin_edges` should be array or list or arrays" + raise TypeError(msg) + + # understand upper and lower trim fractions + if isinstance(upper_trim_fraction, collectionsAbc.Sequence): + v_upper_trim_fraction = upper_trim_fraction[idx] + else: + v_upper_trim_fraction = upper_trim_fraction + + if isinstance(lower_trim_fraction, collectionsAbc.Sequence): + v_lower_trim_fraction = lower_trim_fraction[idx] + else: + v_lower_trim_fraction = lower_trim_fraction + + _plot_single_violin( + violin_position=violin_position, + violin_data=violin_data, + violin_width=violin_width, + violin_kwargs=v_kwargs, + bin_edges=violin_bin_edges, + density=density, + vert=True, + scale=xscale, + upper_trim_fraction=v_upper_trim_fraction, + lower_trim_fraction=v_lower_trim_fraction, + draw_summary_stat=draw_summary_stat, + draw_summary_stat_fxn=draw_summary_stat_fxn, + draw_summary_stat_kwargs=v_draw_summary_stat_kwargs) + + if model_fxn is not None: + if xscale == 'log': + model_xvalues = np.logspace(np.log10(xlim[0]), + np.log10(xlim[1]), + 100) + else: + model_xvalues = np.linspace(xlim[0], xlim[1], 100) + + try: + model_yvalues = model_fxn(model_xvalues) + except Exception: + model_yvalues = [model_fxn(xvalue) + for xvalue in model_xvalues] + + plt.plot(model_xvalues, + model_yvalues, + **draw_model_kwargs) + + ### + # plot zero, min, and max violins if necessary + ### + data_xlim = xlim + if xscale == 'log': + next_violin_position = \ + 10**(np.log10(data_xlim[0]) - violin_width) + else: + next_violin_position = data_xlim[0] - violin_width + + if xscale == 'log' and log_zero_data is not None: + _plot_single_violin( + violin_position=next_violin_position, + violin_data=log_zero_data, + violin_width=violin_width, + violin_kwargs=log_zero_violin_kwargs, + bin_edges=log_zero_bin_edges, + density=density, + vert=True, + scale=xscale, + upper_trim_fraction=log_zero_upper_trim_fraction, + lower_trim_fraction=log_zero_lower_trim_fraction, + draw_summary_stat=draw_summary_stat, + draw_summary_stat_fxn=draw_summary_stat_fxn, + draw_summary_stat_kwargs=log_zero_draw_summary_stat_kwargs) + + if model_fxn is not None: + model_zero_yvalue = model_fxn(0.0) + plt.plot([10**(np.log10(next_violin_position)-violin_width), + 10**(np.log10(next_violin_position)+violin_width)], + [model_zero_yvalue, model_zero_yvalue], + **draw_model_kwargs) + + if draw_log_zero_divider: + plt.axvline(10**(np.log10(next_violin_position) + violin_width), + **draw_log_zero_divider_kwargs) + + xlim = (10**(np.log10(next_violin_position) - violin_width), + xlim[1]) + + next_violin_position = \ + 10**(np.log10(next_violin_position) - 2*violin_width) + + if max_data is not None: + _plot_single_violin( + violin_position=next_violin_position, + violin_data=max_data, + violin_width=violin_width, + violin_kwargs=max_violin_kwargs, + bin_edges=max_bin_edges, + density=density, + vert=True, + scale=xscale, + upper_trim_fraction=max_upper_trim_fraction, + lower_trim_fraction=max_lower_trim_fraction, + draw_summary_stat=draw_summary_stat, + draw_summary_stat_fxn=draw_summary_stat_fxn, + draw_summary_stat_kwargs=max_draw_summary_stat_kwargs) + + if draw_max_line: + summary_stat = draw_summary_stat_fxn(max_data) + plt.plot([next_violin_position, xlim[1]], + [summary_stat, summary_stat], + **draw_max_line_kwargs) + + if draw_minmax_divider: + if xscale == 'log': + plt.axvline(10**(np.log10(next_violin_position) + violin_width), + **draw_minmax_divider_kwargs) + else: + plt.axvline(next_violin_position + violin_width, + **draw_minmax_divider_kwargs) + + if xscale == 'log': + xlim = (10**(np.log10(next_violin_position) - violin_width), + xlim[1]) + else: + xlim = (next_violin_position - violin_width, + xlim[1]) + + if xscale == 'log': + next_violin_position = \ + 10**(np.log10(next_violin_position) - 2*violin_width) + else: + next_violin_position = next_violin_position - 2*violin_width + + if min_data is not None: + _plot_single_violin( + violin_position=next_violin_position, + violin_data=min_data, + violin_width=violin_width, + violin_kwargs=min_violin_kwargs, + bin_edges=min_bin_edges, + density=density, + vert=True, + scale=xscale, + upper_trim_fraction=min_upper_trim_fraction, + lower_trim_fraction=min_lower_trim_fraction, + draw_summary_stat=draw_summary_stat, + draw_summary_stat_fxn=draw_summary_stat_fxn, + draw_summary_stat_kwargs=min_draw_summary_stat_kwargs) + + if draw_min_line: + summary_stat = draw_summary_stat_fxn(min_data) + plt.plot([next_violin_position, xlim[1]], + [summary_stat, summary_stat], + **draw_min_line_kwargs) + + if draw_minmax_divider and max_data is None: + if xscale == 'log': + plt.axvline(10**(np.log10(next_violin_position) + violin_width), + **draw_minmax_divider_kwargs) + else: + plt.axvline(next_violin_position + violin_width, + **draw_minmax_divider_kwargs) + + if xscale == 'log': + xlim = (10**(np.log10(next_violin_position) - violin_width), + xlim[1]) + else: + xlim = (next_violin_position - violin_width, + xlim[1]) + + plt.xscale(xscale) + if yscale == 'logicle': + plt.yscale(yscale, data=all_data, channel=channel) + else: + plt.yscale(yscale) + + plt.xlim(xlim) + plt.ylim(ylim) + + # set x-tick locators and formatters + ax = plt.gca() + if xscale == 'log': + next_violin_position = 10**(np.log10(data_xlim[0]) - violin_width) + if log_zero_data is not None: + zero_tick_loc = next_violin_position + next_violin_position = \ + 10**(np.log10(next_violin_position) - 2*violin_width) + else: + zero_tick_loc = None + if max_data is not None: + max_tick_loc = next_violin_position + next_violin_position = \ + 10**(np.log10(next_violin_position) - 2*violin_width) + else: + max_tick_loc = None + min_tick_loc = None if min_data is None else next_violin_position + if min_data is not None or max_data is not None \ + or log_zero_data is not None: + data_lim_min = data_xlim[0] + else: + data_lim_min = None + + major_locator = _ViolinLogLocator(min_tick_loc=min_tick_loc, + max_tick_loc=max_tick_loc, + zero_tick_loc=zero_tick_loc, + data_lim_min=data_lim_min) + minor_locator = _ViolinLogLocator(min_tick_loc=None, + max_tick_loc=None, + zero_tick_loc=None, + data_lim_min=data_lim_min, + subs='auto') + major_formatter = _ViolinLogFormatterSciNotation( + min_tick_loc=min_tick_loc, + max_tick_loc=max_tick_loc, + zero_tick_loc=zero_tick_loc, + min_tick_label=min_tick_label, + max_tick_label=max_tick_label, + zero_tick_label=log_zero_tick_label) + minor_formatter = _ViolinLogFormatterSciNotation( + min_tick_loc=min_tick_loc, + max_tick_loc=max_tick_loc, + zero_tick_loc=zero_tick_loc, + min_tick_label=min_tick_label, + max_tick_label=max_tick_label, + zero_tick_label=log_zero_tick_label) + + ax.xaxis.set_major_locator(major_locator) + ax.xaxis.set_major_formatter(major_formatter) + ax.xaxis.set_minor_locator(minor_locator) + ax.xaxis.set_minor_formatter(minor_formatter) + else: + next_violin_position = data_xlim[0] - violin_width + if max_data is not None: + max_tick_loc = next_violin_position + next_violin_position -= 2*violin_width + else: + max_tick_loc = None + min_tick_loc = None if min_data is None else next_violin_position + if min_data is not None or max_data is not None: + data_lim_min = data_xlim[0] + else: + data_lim_min = None + + major_locator = _ViolinAutoLocator(min_tick_loc=min_tick_loc, + max_tick_loc=max_tick_loc, + data_lim_min=data_lim_min) + major_formatter = _ViolinScalarFormatter(min_tick_loc=min_tick_loc, + max_tick_loc=max_tick_loc, + min_tick_label=min_tick_label, + max_tick_label=max_tick_label) + + ax.xaxis.set_major_locator(major_locator) + ax.xaxis.set_major_formatter(major_formatter) + + if xlabel is not None: + plt.xlabel(xlabel) + + if ylabel is not None: + # Highest priority is user-provided label + plt.ylabel(ylabel) + elif hasattr(data[-1], 'channels'): + # Attempt to use channel name + plt.ylabel(data[-1].channels[0]) + + if title is not None: + plt.title(title) + + if savefig is not None: + plt.tight_layout() + plt.savefig(savefig, dpi=savefig_dpi) + plt.close() + def density2d(data, channels=[0,1], bins=1024, diff --git a/doc/_static/img/excel_ui/input_beads.png b/doc/_static/img/excel_ui/input_beads.png new file mode 100755 index 0000000..a007b78 Binary files /dev/null and b/doc/_static/img/excel_ui/input_beads.png differ diff --git a/doc/_static/img/excel_ui/input_instruments.png b/doc/_static/img/excel_ui/input_instruments.png new file mode 100755 index 0000000..9facc3a Binary files /dev/null and b/doc/_static/img/excel_ui/input_instruments.png differ diff --git a/doc/_static/img/excel_ui/input_samples.png b/doc/_static/img/excel_ui/input_samples.png new file mode 100755 index 0000000..a42dc80 Binary files /dev/null and b/doc/_static/img/excel_ui/input_samples.png differ diff --git a/doc/_static/img/excel_ui/output_beads_clustering.png b/doc/_static/img/excel_ui/output_beads_clustering.png new file mode 100644 index 0000000..6fae872 Binary files /dev/null and b/doc/_static/img/excel_ui/output_beads_clustering.png differ diff --git a/doc/_static/img/excel_ui/output_beads_density.png b/doc/_static/img/excel_ui/output_beads_density.png new file mode 100644 index 0000000..1d85c28 Binary files /dev/null and b/doc/_static/img/excel_ui/output_beads_density.png differ diff --git a/doc/_static/img/excel_ui/output_beads_populations.png b/doc/_static/img/excel_ui/output_beads_populations.png new file mode 100644 index 0000000..966b12e Binary files /dev/null and b/doc/_static/img/excel_ui/output_beads_populations.png differ diff --git a/doc/_static/img/excel_ui/output_beads_sc.png b/doc/_static/img/excel_ui/output_beads_sc.png new file mode 100644 index 0000000..c414f9a Binary files /dev/null and b/doc/_static/img/excel_ui/output_beads_sc.png differ diff --git a/doc/_static/img/excel_ui/output_sample.png b/doc/_static/img/excel_ui/output_sample.png new file mode 100755 index 0000000..0c89b80 Binary files /dev/null and b/doc/_static/img/excel_ui/output_sample.png differ diff --git a/doc/_static/img/excel_ui/output_spreadsheet_about.png b/doc/_static/img/excel_ui/output_spreadsheet_about.png new file mode 100755 index 0000000..087cd74 Binary files /dev/null and b/doc/_static/img/excel_ui/output_spreadsheet_about.png differ diff --git a/doc/_static/img/excel_ui/output_spreadsheet_beads.png b/doc/_static/img/excel_ui/output_spreadsheet_beads.png new file mode 100755 index 0000000..15cab7b Binary files /dev/null and b/doc/_static/img/excel_ui/output_spreadsheet_beads.png differ diff --git a/doc/_static/img/excel_ui/output_spreadsheet_histograms.png b/doc/_static/img/excel_ui/output_spreadsheet_histograms.png new file mode 100755 index 0000000..4c1da67 Binary files /dev/null and b/doc/_static/img/excel_ui/output_spreadsheet_histograms.png differ diff --git a/doc/_static/img/excel_ui/output_spreadsheet_samples.png b/doc/_static/img/excel_ui/output_spreadsheet_samples.png new file mode 100755 index 0000000..f0dc5ad Binary files /dev/null and b/doc/_static/img/excel_ui/output_spreadsheet_samples.png differ diff --git a/doc/_static/img/fundamentals/fundamentals_calibration_1.png b/doc/_static/img/fundamentals/fundamentals_calibration_1.png new file mode 100644 index 0000000..14f9fb0 Binary files /dev/null and b/doc/_static/img/fundamentals/fundamentals_calibration_1.png differ diff --git a/doc/_static/img/fundamentals/fundamentals_calibration_2.png b/doc/_static/img/fundamentals/fundamentals_calibration_2.png new file mode 100644 index 0000000..67a10b8 Binary files /dev/null and b/doc/_static/img/fundamentals/fundamentals_calibration_2.png differ diff --git a/doc/_static/img/fundamentals/fundamentals_calibration_3.png b/doc/_static/img/fundamentals/fundamentals_calibration_3.png new file mode 100644 index 0000000..c810468 Binary files /dev/null and b/doc/_static/img/fundamentals/fundamentals_calibration_3.png differ diff --git a/doc/_static/img/fundamentals/fundamentals_calibration_4.png b/doc/_static/img/fundamentals/fundamentals_calibration_4.png new file mode 100644 index 0000000..f50316c Binary files /dev/null and b/doc/_static/img/fundamentals/fundamentals_calibration_4.png differ diff --git a/doc/_static/img/fundamentals/fundamentals_calibration_5.png b/doc/_static/img/fundamentals/fundamentals_calibration_5.png new file mode 100644 index 0000000..f5b4208 Binary files /dev/null and b/doc/_static/img/fundamentals/fundamentals_calibration_5.png differ diff --git a/doc/_static/img/fundamentals/fundamentals_density_1.png b/doc/_static/img/fundamentals/fundamentals_density_1.png new file mode 100644 index 0000000..5fea780 Binary files /dev/null and b/doc/_static/img/fundamentals/fundamentals_density_1.png differ diff --git a/doc/_static/img/getting_started/installation_completed.png b/doc/_static/img/getting_started/installation_completed.png new file mode 100755 index 0000000..0624a76 Binary files /dev/null and b/doc/_static/img/getting_started/installation_completed.png differ diff --git a/doc/_static/img/python_tutorial/python_tutorial_gate_density_1.png b/doc/_static/img/python_tutorial/python_tutorial_gate_density_1.png new file mode 100644 index 0000000..46c4589 Binary files /dev/null and b/doc/_static/img/python_tutorial/python_tutorial_gate_density_1.png differ diff --git a/doc/_static/img/python_tutorial/python_tutorial_gate_density_2.png b/doc/_static/img/python_tutorial/python_tutorial_gate_density_2.png new file mode 100644 index 0000000..ba17198 Binary files /dev/null and b/doc/_static/img/python_tutorial/python_tutorial_gate_density_2.png differ diff --git a/doc/_static/img/python_tutorial/python_tutorial_gate_ellipse_1.png b/doc/_static/img/python_tutorial/python_tutorial_gate_ellipse_1.png new file mode 100644 index 0000000..f3ddd8e Binary files /dev/null and b/doc/_static/img/python_tutorial/python_tutorial_gate_ellipse_1.png differ diff --git a/doc/_static/img/python_tutorial/python_tutorial_gate_high_low_1.png b/doc/_static/img/python_tutorial/python_tutorial_gate_high_low_1.png new file mode 100644 index 0000000..28cf2d4 Binary files /dev/null and b/doc/_static/img/python_tutorial/python_tutorial_gate_high_low_1.png differ diff --git a/doc/_static/img/python_tutorial/python_tutorial_gate_high_low_2.png b/doc/_static/img/python_tutorial/python_tutorial_gate_high_low_2.png new file mode 100644 index 0000000..7b5bb11 Binary files /dev/null and b/doc/_static/img/python_tutorial/python_tutorial_gate_high_low_2.png differ diff --git a/doc/_static/img/python_tutorial/python_tutorial_mef_1.png b/doc/_static/img/python_tutorial/python_tutorial_mef_1.png new file mode 100644 index 0000000..b225b61 Binary files /dev/null and b/doc/_static/img/python_tutorial/python_tutorial_mef_1.png differ diff --git a/doc/_static/img/python_tutorial/python_tutorial_mef_2.png b/doc/_static/img/python_tutorial/python_tutorial_mef_2.png new file mode 100644 index 0000000..f274bfb Binary files /dev/null and b/doc/_static/img/python_tutorial/python_tutorial_mef_2.png differ diff --git a/doc/_static/img/python_tutorial/python_tutorial_mef_3.png b/doc/_static/img/python_tutorial/python_tutorial_mef_3.png new file mode 100644 index 0000000..f80aa66 Binary files /dev/null and b/doc/_static/img/python_tutorial/python_tutorial_mef_3.png differ diff --git a/doc/_static/img/python_tutorial/python_tutorial_mef_4.png b/doc/_static/img/python_tutorial/python_tutorial_mef_4.png new file mode 100644 index 0000000..63eef66 Binary files /dev/null and b/doc/_static/img/python_tutorial/python_tutorial_mef_4.png differ diff --git a/doc/_static/img/python_tutorial/python_tutorial_mef_5.png b/doc/_static/img/python_tutorial/python_tutorial_mef_5.png new file mode 100644 index 0000000..6371390 Binary files /dev/null and b/doc/_static/img/python_tutorial/python_tutorial_mef_5.png differ diff --git a/doc/_static/img/python_tutorial/python_tutorial_mef_6.png b/doc/_static/img/python_tutorial/python_tutorial_mef_6.png new file mode 100644 index 0000000..8c9730d Binary files /dev/null and b/doc/_static/img/python_tutorial/python_tutorial_mef_6.png differ diff --git a/doc/_static/img/python_tutorial/python_tutorial_plot_density_2d_1.png b/doc/_static/img/python_tutorial/python_tutorial_plot_density_2d_1.png new file mode 100644 index 0000000..3c7fad6 Binary files /dev/null and b/doc/_static/img/python_tutorial/python_tutorial_plot_density_2d_1.png differ diff --git a/doc/_static/img/python_tutorial/python_tutorial_plot_density_2d_2.png b/doc/_static/img/python_tutorial/python_tutorial_plot_density_2d_2.png new file mode 100644 index 0000000..1807c39 Binary files /dev/null and b/doc/_static/img/python_tutorial/python_tutorial_plot_density_2d_2.png differ diff --git a/doc/_static/img/python_tutorial/python_tutorial_plot_density_and_hist_1.png b/doc/_static/img/python_tutorial/python_tutorial_plot_density_and_hist_1.png new file mode 100644 index 0000000..e8b5886 Binary files /dev/null and b/doc/_static/img/python_tutorial/python_tutorial_plot_density_and_hist_1.png differ diff --git a/doc/_static/img/python_tutorial/python_tutorial_plot_hist1d_1.png b/doc/_static/img/python_tutorial/python_tutorial_plot_hist1d_1.png new file mode 100644 index 0000000..4d5993c Binary files /dev/null and b/doc/_static/img/python_tutorial/python_tutorial_plot_hist1d_1.png differ diff --git a/doc/_static/img/python_tutorial/python_tutorial_plot_hist1d_2.png b/doc/_static/img/python_tutorial/python_tutorial_plot_hist1d_2.png new file mode 100644 index 0000000..39f8a46 Binary files /dev/null and b/doc/_static/img/python_tutorial/python_tutorial_plot_hist1d_2.png differ diff --git a/doc/_static/img/python_tutorial/python_tutorial_plot_hist1d_3.png b/doc/_static/img/python_tutorial/python_tutorial_plot_hist1d_3.png new file mode 100644 index 0000000..e3dbba0 Binary files /dev/null and b/doc/_static/img/python_tutorial/python_tutorial_plot_hist1d_3.png differ diff --git a/doc/_static/img/python_tutorial/python_tutorial_plot_violin_1.png b/doc/_static/img/python_tutorial/python_tutorial_plot_violin_1.png new file mode 100644 index 0000000..a09622d Binary files /dev/null and b/doc/_static/img/python_tutorial/python_tutorial_plot_violin_1.png differ diff --git a/doc/_static/img/python_tutorial/python_tutorial_plot_violin_2.png b/doc/_static/img/python_tutorial/python_tutorial_plot_violin_2.png new file mode 100644 index 0000000..c9a655a Binary files /dev/null and b/doc/_static/img/python_tutorial/python_tutorial_plot_violin_2.png differ diff --git a/doc/_static/img/python_tutorial/python_tutorial_transform_1.png b/doc/_static/img/python_tutorial/python_tutorial_transform_1.png new file mode 100644 index 0000000..8ef8d8d Binary files /dev/null and b/doc/_static/img/python_tutorial/python_tutorial_transform_1.png differ diff --git a/doc/_static/img/python_tutorial/python_tutorial_transform_2.png b/doc/_static/img/python_tutorial/python_tutorial_transform_2.png new file mode 100644 index 0000000..b011ecc Binary files /dev/null and b/doc/_static/img/python_tutorial/python_tutorial_transform_2.png differ diff --git a/doc/contribute/how_to.rst b/doc/contribute/how_to.rst index 04097e3..122636b 100644 --- a/doc/contribute/how_to.rst +++ b/doc/contribute/how_to.rst @@ -27,4 +27,24 @@ A recommended workflow for contributing to ``FlowCal`` is as follows: 5. Set up your virtual environment, if desired. 6. Install ``FlowCal`` in developer mode, using ``python setup.py develop``. 7. Write/test code, commit. Repeat until feature is fully implemented. -8. Push and submit a merge request towards ``develop``. \ No newline at end of file +8. Push and submit a merge request towards ``develop``. + +Version Policy +-------------- +The version number in ``FlowCal`` is organized as follows: ``MAJOR.MINOR.PATCH``. The following are guidelines on how to manage version numbers: + +* The patch version number should only be increased after fixing a bug or an incompatibility issue, if the public API was not modified at all. + +* The minor version number should be increased after a relatively minor API modification. For example: + + * After fixing a bug, when a minor API modification was required to do so. + * After making a small adjustment to a function signature, such as adding a new argument or changing the data type of an existing one. + * After adding one or more relatively minor new features (e.g. a new plotting function). + +* The major version number should be increased after a fundamental modification to the API and/or the package, or the introduction of a major feature. For example: + + * After completely reorganizing the FCSData object or the functions in the package + * After introducing a new Excel UI with a completely reorganized input file format. + * After introducing a Graphical User Interface. + +In general, new patch versions should not break a user's code, whereas minor versions should not require more than minor adjustments. Major versions could either require significant changes in the user's code or a complete change in the way they think about ``FlowCal``'s API. \ No newline at end of file diff --git a/doc/excel_ui/input_format.rst b/doc/excel_ui/input_format.rst index 9b2cadf..d65be52 100644 --- a/doc/excel_ui/input_format.rst +++ b/doc/excel_ui/input_format.rst @@ -12,7 +12,7 @@ Instruments sheet This sheet must be filled with basic information about the flow cytometer used to acquire the samples. Each row represents an instrument. Typically, the user would only need to specify one instrument. However, ``FlowCal`` allows the simultaneous processing of samples taken with different instruments. The figure below shows an example of an **Instruments** sheet. -.. image:: https://www.dropbox.com/s/jgmgbgc2rbwagle/input_instruments.png?raw=1 +.. image:: /_static/img/excel_ui/input_instruments.png For each row, the following columns must be filled. @@ -28,7 +28,7 @@ Beads sheet This sheet contains details about calibration microbeads and how to process them. Each row represents a different sample of beads. The figure below shows an example of an **Beads** sheet. -.. image:: https://www.dropbox.com/s/e4snspj3w8yiqpl/input_beads.png?raw=1 +.. image:: /_static/img/excel_ui/input_beads.png For each row, the following columns must be filled: @@ -46,7 +46,7 @@ Samples sheet In this sheet, the user specifies cell samples and tells ``FlowCal`` how to process them. Each row contains the information used in the analysis of one FCS file. One file can be analyzed several times with different options (e.g. gating fractions or fluorescence units) by adding more rows that reference the same file. The figure below shows an example of a **Samples** sheet. -.. image:: https://www.dropbox.com/s/6c5b9lme2eg7iwx/input_samples.png?raw=1 +.. image:: /_static/img/excel_ui/input_samples.png For each row, the following columns must be filled: @@ -61,6 +61,6 @@ For each row, the following columns must be filled: c. **MEF**: MEF units. 6. **Gate Fraction** (F): Fraction of samples to keep when performing :doc:`density gating`. -Additional columns, such as **Strain**, **Plasmid**, and **IPTG (mM)** (columns G, H, and I), can be added in any place for the user’s records, and will be copied unmodified to the output Excel file by ``FlowCal``. +Additional columns, such as **Strain**, **Plasmid**, and **DAPG (uM)** (columns G, H, and I), can be added in any place for the user’s records, and will be copied unmodified to the output Excel file by ``FlowCal``. -.. warning:: If MEF units are requested for a fluorescence channel of a sample, an FCS file with calibration beads data should be specified in the **Beads ID** column. Both beads and samples should have been acquired at the same settings for the specified fluorescence channel, otherwise ``FlowCal`` will throw an error. \ No newline at end of file +.. warning:: If MEF units are requested for a fluorescence channel of a sample, an FCS file with calibration beads data should be specified in the **Beads ID** column. Both beads and samples should have been acquired at the same settings for the specified fluorescence channel, otherwise ``FlowCal`` will throw an error. diff --git a/doc/excel_ui/outputs.rst b/doc/excel_ui/outputs.rst index b7487dc..281bf73 100644 --- a/doc/excel_ui/outputs.rst +++ b/doc/excel_ui/outputs.rst @@ -12,29 +12,29 @@ Plots a. ``density_hist_.png``: A forward/side scatter 2D density diagram of the calibration particle sample, and a histogram for each relevant fluorescence channel. - .. image:: https://www.dropbox.com/s/qbnudtz1cfej7wg/output_beads_density.png?raw=1 + .. image:: /_static/img/excel_ui/output_beads_density.png b. ``clustering_.png``: A plot of the sub-populations identified during the clustering step, where the different sub-populations are shown in different colors. Depending on the number of channels used for clustering, this plot is a histogram (when using only one channel), a 2D scatter plot (when using two channels), or a 3D scatter plot with three 2D projections (when using three channels or more). - .. image:: https://www.dropbox.com/s/omvr8t6qatuo64v/output_beads_clustering.png?raw=1 + .. image:: /_static/img/excel_ui/output_beads_clustering.png .. note:: It is normally easy to distinguish the different bead populations in this plot, and the different colors should correspond to this expectation. If the populations have been identified incorrectly, changing the number of channels used for clustering or the density gate fraction can improve the results. These two parameters can be changed in the **Beads** sheet of the input Excel file. c. ``populations__.png``: A histogram showing the identified microbead sub-populations in different colors, for each fluorescence channel in which a MEF standard curve is to be calculated. In addition, a vertical line is shown representing the median of each population, which is later used to calculate the standard curve. Sub-populations that were not used to generate the standard curve are shown in gray. - .. image:: https://www.dropbox.com/s/j8ac6reg6uxbbz5/output_beads_populations.png?raw=1 + .. image:: /_static/img/excel_ui/output_beads_populations.png .. note:: All populations should be unimodal. Bimodal populations indicate incorrect clustering. This can be fixed by changing the number of channels used for clustering or the density gate fraction in the **Beads** sheet of the input Excel file. d. ``std_crv__.png``: A plot of the fitted standard curve, for each channel in which MEF values were specified. - .. image:: https://www.dropbox.com/s/e1mp3woslx32rev/output_beads_sc.png?raw=1 + .. image:: /_static/img/excel_ui/output_beads_sc.png .. note:: All the blue dots should line almost perfectly on the green line, otherwise the estimation of the standard curve might not be good. If this is not the case, you should make sure that clustering is being performed correctly by looking at the previous plots. If one dot differs significantly from the curve despite perfect clustering, you might want to manually remove it. This can be done by replacing its MEF value with the word "None" in the **Beads** sheet of the input Excel file. 2. The folder ``plot_samples`` contains plots of the experimental cell samples. Each experimental sample of name “ID” as specified in the Excel input sheet results in a file named ``.png``. This image contains a forward/side scatter 2D density diagram with the gated region indicated, and a histogram for each user-specified fluorescence channel. -.. image:: https://www.dropbox.com/s/mlkc6t22bzxz9k0/output_sample.png?raw=1 +.. image:: /_static/img/excel_ui/output_sample.png .. _excel-ui-outputs-excel: @@ -47,16 +47,16 @@ In both sheets, the number of events after gating and the acquisition time are r Statistics per beads file, per fluorescence channel include: the channel gain, the amplifier type, the equation of the beads fluorescence model used, and the values of the fitted parameters. -.. image:: https://www.dropbox.com/s/grgops0e5pt03ci/output_spreadsheet_beads.png?raw=1 +.. image:: /_static/img/excel_ui/output_spreadsheet_beads.png Statistics per cell sample, per fluorescence channel include: channel gain, mean, geometric mean, median, mode, arithmetic and geometric standard deviation, arithmetic and geometric coefficient of variation (CV), interquartile range (IQR), and robust coefficient of variation (RCV). Note that if an error has been found, the **Analysis Notes** field will be populated, and statistics and plots will not be reported. -.. image:: https://www.dropbox.com/s/kjxpln97s42low8/output_spreadsheet_samples.png?raw=1 +.. image:: /_static/img/excel_ui/output_spreadsheet_samples.png In addition, a **Histograms** tab is generated, with bin/counts pairs for each sample and relevant fluorescence channel in the specified units. -.. image:: https://www.dropbox.com/s/t4xq6rbm6jpri9d/output_spreadsheet_histograms.png?raw=1 +.. image:: /_static/img/excel_ui/output_spreadsheet_histograms.png One last tab named **About Analysis** is added with information about the corresponding input Excel file, the date and time of the run, and the FlowCal version used. -.. image:: https://www.dropbox.com/s/hyi7k97cofqiibl/output_spreadsheet_about.png?raw=1 +.. image:: /_static/img/excel_ui/output_spreadsheet_about.png diff --git a/doc/fundamentals/calibration.rst b/doc/fundamentals/calibration.rst index c9def81..cb42197 100644 --- a/doc/fundamentals/calibration.rst +++ b/doc/fundamentals/calibration.rst @@ -28,7 +28,7 @@ To perform MEF calibration, the following steps are typically followed: Calibration beads must be measured in every experiment, using the same acquisition settings as when measuring cell samples. The figure below shows typical flow cytometry data from calibration beads. -.. image:: https://www.dropbox.com/s/z4atwyiba8wy3b1/fundamentals_calibration_1.png?raw=1 +.. image:: /_static/img/fundamentals/fundamentals_calibration_1.png The top subfigure shows data from the forward/side scatter channels, whereas the bottom one shows one of the fluorescence channels. Note how several populations with different fluorescence values are evident in the bottom plot. @@ -37,27 +37,27 @@ The top subfigure shows data from the forward/side scatter channels, whereas the Notice, in the figure above, that two different populations are present in the forward/side scatter plot. The faint population on the right/upper portion of the plot corresponds to bead aggregates. These are obviously undesired, as we are only interested in single bead fluorescence. This sort of situation is normally dealt with by "gating", which involves manually drawing a region of interest and retaining the events that fall inside. ``FlowCal`` performs :doc:`density gating`, an automated procedure to eliminate aggregates and other events that are clearly different from the main population of interest. The figure below shows a black contour surrounding the region identified by density gating in the forward/side scatter plot, showing that density gating can distinguish single beads from aggregates. Notice also how small peaks in the fluorescence plot disappear after density-gating, which is consistent with the eliminated population being composed of agglomerations of multiple beads. -.. image:: https://www.dropbox.com/s/95ph8z8ajdn5wta/fundamentals_calibration_2.png?raw=1 +.. image:: /_static/img/fundamentals/fundamentals_calibration_2.png 3. Identification of Bead Subpopulations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In order to calculate the average fluorescence of each subpopulation, the individual events corresponding to each must first be identified. The figure below shows one of the plots produced by ``FlowCal`` after an automated clustering algorithm has properly identified each subpopulation. Note how this can be achieved using information from several fluorescence channels at the same time. -.. image:: https://www.dropbox.com/s/x0gnbrhzp51ljum/fundamentals_calibration_3.png?raw=1 +.. image:: /_static/img/fundamentals/fundamentals_calibration_3.png Next, the average fluorescence of each subpopulation is calculated. Some subpopulations, however, can have fluorescence values that are outside the limit of detection of the instrument, and therefore their events will show saturated fluorescence values. These subpopulations should not be considered further in the analysis. ``FlowCal`` discards these automatically. The figure below shows the individual subpopulations with a vertical line representing their median fluorescence. In addition, subpopulations that were automatically discarded are shown colored in gray. -.. image:: https://www.dropbox.com/s/r9n8s96fmj7runm/fundamentals_calibration_4.png?raw=1 +.. image:: /_static/img/fundamentals/fundamentals_calibration_4.png 4. Calculation of a Standard Curve ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Having the fluorescence of the individual populations, as measured by the flow cytometer, and the MEF values provided by the manufacturer, a standard curve can be calculated to transform fluorescence of any event to MEF. The figure below shows an example of such a standard curve. ``FlowCal`` uses the concept of a "bead fluorescence model", which is directly fitted to bead data but not immediately applicable to cells. However, some small mathematical manipulations turn this bead fluorescence model into a standard curve that is readily applicable to cells. -.. image:: https://www.dropbox.com/s/9c6ibfo0vxa9j1a/fundamentals_calibration_5.png?raw=1 +.. image:: /_static/img/fundamentals/fundamentals_calibration_5.png 5. Conversion of Cell Fluorescence to MEF ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/fundamentals/density_gate.rst b/doc/fundamentals/density_gate.rst index 2049e04..8b7a557 100644 --- a/doc/fundamentals/density_gate.rst +++ b/doc/fundamentals/density_gate.rst @@ -8,7 +8,7 @@ Density gating looks at two channels of flow cytometry data, and discards events In the figure below, a sample was acquired with an intentionally low side-scatter threshold to allow a significant number of events corresponding to non-biological debris. Density gating was then applied to retain 50% of the events in the densest region. Because cells have a more uniform size than the observed debris, density gating retains mostly cells, which is reflected in the fact that FL1 fluorescence is bimodal before gating, but not after. -.. image:: https://www.dropbox.com/s/rz2cvv0vug4ws7g/fundamentals_density_1.png?raw=1 +.. image:: /_static/img/fundamentals/fundamentals_density_1.png .. note:: The sample shown above was intentionally acquired with a low threshold value in ``SSC`` to show the capabilities of density gating. Normally, a lot of the debris can be eliminated by simply selecting a higher ``SSC`` threshold. However, density gating is still an excellent method to clean the data and eliminate all the debris that a simple threshold cannot filter. In our experience, this can still be a significant fraction of the total event count, especially if the cell culture has low density. diff --git a/doc/getting_started/install_anaconda.rst b/doc/getting_started/install_anaconda.rst index d84c3f3..b04cce7 100644 --- a/doc/getting_started/install_anaconda.rst +++ b/doc/getting_started/install_anaconda.rst @@ -3,11 +3,11 @@ Installing FlowCal with Anaconda To install Anaconda and ``FlowCal``, do the following: -1. Navigate to https://www.anaconda.com/distribution/#download-section. Make sure that your operating system is selected (Windows, macOS, Linux). Click on the "Download" button below the "Python 3.7 version" message. This will download the installer. +1. Navigate to `this `_ page and scroll down to the "Anaconda Installers" section. Click on the "Graphical Installer" link below the name of your operating system (Windows, MacOS, or Linux). This will download the installer. -.. note:: **Windows**: If your computer is a 32-bit PC, click on the message "32-Bit Graphical Installer" instead of the "Download" button. If you don't know whether yours is a 32 or 64 computer but you have purchased it in the last five years, it is probably a 64-bit computer and you can ignore this message. +.. note:: **Windows**: If your computer is a 32-bit PC, click on the message "32-Bit Graphical Installer" instead of the "Download" button. If you don't know whether yours is a 32 or 64-bit computer but you have purchased it in the last five years, it is probably a 64-bit computer and you can ignore this message. -.. note:: Python 2.7 is also supported. However, we recommend downloading the Python 3.7 version of Anaconda. +.. note:: Python 2.7 is also supported. However, we recommend downloading the Python 3.8 version of Anaconda. 2. Double click the installer (.exe in Windows, .pkg in OS X) and follow the instructions on screen. @@ -17,7 +17,7 @@ To install Anaconda and ``FlowCal``, do the following: 4. Inside the unzipped folder, double click on ``Install FlowCal (Windows).bat`` or ``Install FlowCal (macOS)`` if you are using Windows or OS X, respectively. This will open a terminal window and install ``FlowCal``. The installation procedure may take a few minutes. When installation is finished, the terminal will show the message “Press Enter to finish...”. If the installation was successful, your terminal should look like the figure below. Press Enter to close the terminal window. -.. image:: https://www.dropbox.com/s/9ygziuk8r2r93kw/installation_completed.png?raw=1 +.. image:: /_static/img/getting_started/installation_completed.png .. note:: **Windows**: If the following message appears after double clicking ``Install FlowCal (Windows)``: “Windows protected your PC – Windows SmartScreen prevented an unrecognized app from starting...”, click on the “More info” link under the text, and then click on the “Run anyway” button. This will remove the security restriction from the program and allow it to run properly. diff --git a/doc/getting_started/install_python.rst b/doc/getting_started/install_python.rst index 163ef28..3ecd41f 100644 --- a/doc/getting_started/install_python.rst +++ b/doc/getting_started/install_python.rst @@ -1,7 +1,7 @@ Installing FlowCal in an Existing Python Evironment ======================================================= -Python (2.7, 3.6, or 3.7) is required, along with ``pip`` and ``setuptools``. The easiest way is to install ``FlowCal`` is to use ``pip``:: +Python (2.7, 3.6, 3.7, or 3.8) is required, along with ``pip`` and ``setuptools``. The easiest way is to install ``FlowCal`` is to use ``pip``:: pip install FlowCal diff --git a/doc/python_tutorial/gate.rst b/doc/python_tutorial/gate.rst index f852c42..329f7d6 100644 --- a/doc/python_tutorial/gate.rst +++ b/doc/python_tutorial/gate.rst @@ -15,16 +15,16 @@ Also, import ``numpy`` and ``pyplot`` from ``matplotlib`` Removing Saturated Events ------------------------- -We'll start by loading the data from file ``Data001.fcs`` into an ``FCSData`` object called ``s``. Then, transform all channels into a.u. +We'll start by loading the data from file ``sample006.fcs`` into an ``FCSData`` object called ``s``. Then, transform all channels into a.u. ->>> s = FlowCal.io.FCSData('FCFiles/Data001.fcs') +>>> s = FlowCal.io.FCSData('FCFiles/sample006.fcs') >>> s = FlowCal.transform.to_rfi(s) In the :doc:`plotting tutorial ` we looked at a density plot of the forward scatter/side scatter (``FSC``/``SSC``) channels and identified several clusters of particles (events). This density plot is repeated below for convenience. -.. image:: https://www.dropbox.com/s/j2fe7f7drib5nvs/python_tutorial_plot_density_2d_2.png?raw=1 +.. image:: /_static/img/python_tutorial/python_tutorial_plot_density_2d_2.png -From these clusters, the faint one in the low-middle portion corresponds to non-cellular debris, and the one above corresponds to cells. One additional group on the left corresponds to saturated events, with the lowest possible forward scatter value: 1 a.u.. +From these subpopulations, the faint elongated one in the low-middle portion corresponds to non-cellular debris, and the large one in the middle corresponds to cells. One additional elongated subpopulation on the left corresponds to saturated events, with the lowest possible forward scatter value: 1 a.u.. Some flow cytometers will capture events outside of their range and assign them either the lowest or highest possible values of a channel, depending on which side of the range they fall on. We call these events "saturated". Including them in the analysis results, most of the time, in distorted distribution shapes and incorrect statistics. Therefore, it is generally advised to remove saturated events. To do so, ``FlowCal`` incorporates the function :func:`FlowCal.gate.high_low`. This function retains all the events in the specified channels between two specified values: a high and a low threshold. If these values are not specified, however, the function uses the saturating values. @@ -34,17 +34,17 @@ Some flow cytometers will capture events outside of their range and assign them ... mode='scatter') >>> plt.show() -.. image:: https://www.dropbox.com/s/55wge5a0jb40m5z/python_tutorial_gate_high_low_1.png?raw=1 +.. image:: /_static/img/python_tutorial/python_tutorial_gate_high_low_1.png -We successfully removed the events on the left. We can go one step further and use :func:`FlowCal.gate.high_low` again to remove the cluster in the lower middle of the plot, which as we said before corresponds to debris. +We successfully removed the events on the left. We can go one step further and use :func:`FlowCal.gate.high_low` again to remove some of the events below the main event cluster, which as we said before corresponds to debris. ->>> s_g2 = FlowCal.gate.high_low(s_g1, channels='SSC', low=25) +>>> s_g2 = FlowCal.gate.high_low(s_g1, channels='SSC', low=280) >>> FlowCal.plot.density2d(s_g2, ... channels=['FSC', 'SSC'], ... mode='scatter') >>> plt.show() -.. image:: https://www.dropbox.com/s/jeqjk1i6rdyh4sj/python_tutorial_gate_high_low_2.png?raw=1 +.. image:: /_static/img/python_tutorial/python_tutorial_gate_high_low_2.png This approach, however, requires one to estimate a low threshold value for every sample manually. In addition, we usually want events in the densest forward scatter/side scatter region, which requires a more complex shape than a pair of thresholds. We will now explore better ways to achieve this. @@ -56,7 +56,7 @@ Ellipse Gate >>> s_g3 = FlowCal.gate.ellipse(s_g1, ... channels=['FSC', 'SSC'], ... log=True, -... center=(2.4, 1.7), +... center=(2.3, 2.78), ... a=0.3, ... b=0.2, ... theta=30/180.*np.pi) @@ -65,7 +65,7 @@ Ellipse Gate ... mode='scatter') >>> plt.show() -.. image:: https://www.dropbox.com/s/zio5jhyrg95sqpn/python_tutorial_gate_ellipse_1.png?raw=1 +.. image:: /_static/img/python_tutorial/python_tutorial_gate_ellipse_1.png As shown above, the remaining events reside only inside an ellipse-shaped region. Note that we used the argument ``log``, which indicates that the gated region should look like an ellipse in a logarithmic plot. This also requires that the center and the major and minor axes (``a`` and ``b``) be specified in log space. @@ -83,7 +83,7 @@ Density Gate ... mode='scatter') >>> plt.show() -.. image:: https://www.dropbox.com/s/k8dr87do3avt2df/python_tutorial_gate_density_1.png?raw=1 +.. image:: /_static/img/python_tutorial/python_tutorial_gate_density_1.png We can see that :func:`FlowCal.gate.density2d` automatically identified the region that contains cells, and defined a shape that more closely resembles what the ungated density map looks like. The parameter ``gating_fraction`` allows the user to control the fraction of events to retain, and it is the only parameter that the user is required to specify. @@ -112,6 +112,6 @@ The function :func:`FlowCal.plot.density_and_hist` was introduced in the :doc:`p >>> plt.tight_layout() >>> plt.show() -.. image:: https://www.dropbox.com/s/oorefy9n526hgj4/python_tutorial_gate_density_2.png?raw=1 +.. image:: /_static/img/python_tutorial/python_tutorial_gate_density_2.png We can now observe the gating contour right on top of the ungated data, and see which events were kept and which ones were left out. In addition, we can visualize how gating affected the other channels. diff --git a/doc/python_tutorial/mef.rst b/doc/python_tutorial/mef.rst index 4922a2f..49eef03 100644 --- a/doc/python_tutorial/mef.rst +++ b/doc/python_tutorial/mef.rst @@ -15,9 +15,9 @@ Also, import ``numpy`` and ``pyplot`` from ``matplotlib`` Working with Calibration Beads ------------------------------ -As mentioned in the :doc:`fundamentals` section, conversion to MEF requires measuring calibration beads. ``Beads006.fcs`` in the ``FCFiles`` folder contains beads data. Let's examine it. +As mentioned in the :doc:`fundamentals` section, conversion to MEF requires measuring calibration beads. ``sample001.fcs`` in the ``FCFiles`` folder contains beads data. Let's examine it. ->>> b = FlowCal.io.FCSData('FCFiles/Beads006.fcs') +>>> b = FlowCal.io.FCSData('FCFiles/sample001.fcs') >>> b = FlowCal.transform.to_rfi(b) >>> b_g, __, c = FlowCal.gate.density2d(b, ... channels=['FSC', 'SSC'], @@ -35,7 +35,7 @@ As mentioned in the :doc:`fundamentals` section, conv >>> plt.tight_layout() >>> plt.show() -.. image:: https://www.dropbox.com/s/fgzpnmk7njfirua/python_tutorial_mef_1.png?raw=1 +.. image:: /_static/img/python_tutorial/python_tutorial_mef_1.png The ``FSC``/``SSC`` density plot shows two groups of events: the dense group in the middle corresponds to single beads, whereas the fainter cluster on the upper right corresponds to bead agglomerations. Only single beads should be used, so :func:`FlowCal.gate.density2d` is used here to identify single beads automatically. Looking at the ``FL1`` histogram, we can clearly distinguish 8 subpopulations with different fluorescence levels. Note that the group with the highest fluorescence seems to be close to saturation. @@ -48,7 +48,7 @@ Generating a transformation function from calibration beads data is a complicate >>> # Obtain transformation function >>> # The following MEFL values were provided by the beads' manufacturer ->>> mefl_values = np.array([0, 646, 1704, 4827, 15991, 47609, 135896, 273006]) +>>> mefl_values = np.array([0, 792, 2079, 6588, 16471, 47497, 137049, 271647]) >>> to_mef = FlowCal.mef.get_transform_fxn(b_g, ... mef_values=mefl_values, ... mef_channels='FL1', @@ -60,7 +60,7 @@ The argument ``plot`` instructs :func:`FlowCal.mef.get_transform_fxn` to generat Let's now use ``to_mef`` to transform fluroescence data to MEF. >>> # Load sample ->>> s = FlowCal.io.FCSData('FCFiles/Data001.fcs') +>>> s = FlowCal.io.FCSData('FCFiles/sample006.fcs') >>> >>> # Transform all channels to a.u., and then FL1 to MEF. >>> s = FlowCal.transform.to_rfi(s) @@ -76,7 +76,7 @@ Let's now use ``to_mef`` to transform fluroescence data to MEF. >>> FlowCal.plot.hist1d(s_g, channel='FL1') >>> plt.show() -.. image:: https://www.dropbox.com/s/k7qyw9i5w4zs6gv/python_tutorial_mef_2.png?raw=1 +.. image:: /_static/img/python_tutorial/python_tutorial_mef_2.png ``s_g`` now contains ``FL1`` fluorescence values in MEF units. Note that the values in the x axis of the histogram do not match the ones showed before in channel (raw) units or a.u.. This is always true in general, because fluorescence is now expressed in different units. @@ -92,15 +92,15 @@ Generating a MEF transformation function involves four steps: ``FlowCal`` uses a clustering algorithm to automatically identify the different subpopulations of beads. The algorithm will try to find as many populations as values are provided in ``mef_values``. -A plot with a default filename of ``clustering_.png`` is generated by :func:`FlowCal.mef.get_transform_fxn` after the completion of this step. This plot is a histogram or scatter plot in which different subpopulations are shown in a different colors. Such plot is shown below, for ``Beads006.fcs``. +A plot with a default filename of ``clustering_.png`` is generated by :func:`FlowCal.mef.get_transform_fxn` after the completion of this step. This plot is a histogram or scatter plot in which different subpopulations are shown in a different colors. Such plot is shown below, for ``sample001.fcs``. -.. image:: https://www.dropbox.com/s/vkm8n3kt53mfqrp/python_tutorial_mef_3.png?raw=1 +.. image:: /_static/img/python_tutorial/python_tutorial_mef_3.png It is always visually clear which events correspond to which groups, and the different colors should correspond to this expectation. If they don't, sometimes it helps to use a different set of fluorescence channels for clustering (see below), or to use a different gating fraction in the previous density gating step. The default clustering algorithm is Gaussian Mixture Models, implemented in :func:`FlowCal.mef.clustering_gmm`. However, a function implementing another clustering algorithm can be provided to :func:`FlowCal.mef.get_transform_fxn` through the argument ``clustering_fxn``. In addition, the argument ``clustering_channels`` specifies which channels to use for clustering. This can be different than ``mef_channels``, the channels for which to generate a standard curve. A plot resulting from clustering with two fluroescence channels is shown below. -.. image:: https://www.dropbox.com/s/6v9wke9s8sf0og9/python_tutorial_mef_4.png?raw=1 +.. image:: /_static/img/python_tutorial/python_tutorial_mef_4.png 2. Calculation of Population Statistics ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -112,9 +112,9 @@ For each channel in ``mef_channels``, a representative fluorescence value in a.u For each channel in ``mef_channels``, subpopulations close to saturation are discarded. -A plot with a default filename of ``populations__.png`` is generated by :func:`FlowCal.mef.get_transform_fxn` for each channel in ``mef_channels`` after the completion of this step. This plot is a histogram showing each population, as identified in step one, with vertical lines showing their representative statistic as calculated from step 2, and with the discarded populations colored in grey. Such plot is shown below, for ``Beads006.fcs`` and channel ``FL1``. +A plot with a default filename of ``populations__.png`` is generated by :func:`FlowCal.mef.get_transform_fxn` for each channel in ``mef_channels`` after the completion of this step. This plot is a histogram showing each population, as identified in step one, with vertical lines showing their representative statistic as calculated from step 2, and with the discarded populations colored in grey. Such plot is shown below, for ``sample001.fcs`` and channel ``FL1``. -.. image:: https://www.dropbox.com/s/kk23qfdt3d9soed/python_tutorial_mef_5.png?raw=1 +.. image:: /_static/img/python_tutorial/python_tutorial_mef_5.png By default, populations whose mean is closer than a few standard deviations from one of the edge values are discarded. This is encoded in the function :func:`FlowCal.mef.selection_std`. A different method can be used by providing a different function to :func:`FlowCal.mef.get_transform_fxn` through the argument ``selection_fxn``. This argument can even be None, in which case no populations are discarded. Finally, one can manually discard a population by using ``None`` as its MEF fluorescence value in ``mef_values``. Discarding populations specified in this way is performed in addition to ``selection_fxn``. @@ -123,9 +123,9 @@ By default, populations whose mean is closer than a few standard deviations from A bead fluorescence model is fitted to the fluorescence values of each subpopulation in a.u., as calculated in step 2, and in MEF units, as provided in ``mef_values``. A standard curve can then be calculated from the bead fluorescence model. -A plot with a default filename of ``std_crv__.png`` is generated by :func:`FlowCal.mef.get_transform_fxn` for each channel in ``mef_channels`` after the completion of this step. This plot shows the fluorescence values of each population in a.u. and MEF, the fitted bead fluorescence model, and the resulting standard curve. Such plot is shown below, for ``Beads006.fcs`` and channel ``FL1``. +A plot with a default filename of ``std_crv__.png`` is generated by :func:`FlowCal.mef.get_transform_fxn` for each channel in ``mef_channels`` after the completion of this step. This plot shows the fluorescence values of each population in a.u. and MEF, the fitted bead fluorescence model, and the resulting standard curve. Such plot is shown below, for ``sample001.fcs`` and channel ``FL1``. -.. image:: https://www.dropbox.com/s/lzomvsw74ktem8r/python_tutorial_mef_6.png?raw=1 +.. image:: /_static/img/python_tutorial/python_tutorial_mef_6.png It is worth noting that the bead fluorescence model and the standard curve are different, in that bead fluorescence is also affected by bead autofluorescence, its fluorescence when no fluorophore is present. To obtain the standard curve, autofluorescence is eliminated from the model. Such a model is fitted in :func:`FlowCal.mef.fit_beads_autofluorescence`, but a different model can be provided to :func:`FlowCal.mef.get_transform_fxn` using the argument ``fitting_fxn``. diff --git a/doc/python_tutorial/plot.rst b/doc/python_tutorial/plot.rst index 8d6670c..b964750 100644 --- a/doc/python_tutorial/plot.rst +++ b/doc/python_tutorial/plot.rst @@ -7,16 +7,17 @@ To start, navigate to the ``examples`` directory included with FlowCal, and open >>> import FlowCal -Also, import ``pyplot`` from ``matplotlib`` +Also, import ``numpy`` and ``pyplot`` from ``matplotlib`` +>>> import numpy as np >>> import matplotlib.pyplot as plt Histograms ---------- -Let's load the data from file ``Data001.fcs`` into an ``FCSData`` object called ``s``, and tranform all channels to arbitrary units. +Let's load the data from file ``sample006.fcs`` into an ``FCSData`` object called ``s``, and tranform all channels to arbitrary units. ->>> s = FlowCal.io.FCSData('FCFiles/Data001.fcs') +>>> s = FlowCal.io.FCSData('FCFiles/sample006.fcs') >>> s = FlowCal.transform.to_rfi(s) One is often interested in the fluorescence distribution across a population of cells. This is represented in a histogram. Since ``FCSData`` is a numpy array, one could use the standard ``hist`` function included in matplotlib. Alternatively, ``FlowCal`` includes its own histogram function specifically tailored to work with ``FCSData`` objects. For example, one can plot the contents of the ``FL1`` channel with a single call to :func:`FlowCal.plot.hist1d`. @@ -24,7 +25,7 @@ One is often interested in the fluorescence distribution across a population of >>> FlowCal.plot.hist1d(s, channel='FL1') >>> plt.show() -.. image:: https://www.dropbox.com/s/lqz6blikfcmx2ub/python_tutorial_plot_hist1d_1.png?raw=1 +.. image:: /_static/img/python_tutorial/python_tutorial_plot_hist1d_1.png :func:`FlowCal.plot.hist1d` behaves mostly like a regular matplotlib plotting function: it will plot in the current figure and axis. The axes labels are populated by default, but one can still use ``plt.xlabel`` and ``plt.ylabel`` to change them. @@ -33,18 +34,18 @@ By default, :func:`FlowCal.plot.hist1d` uses something called *logicle* scaling >>> FlowCal.plot.hist1d(s, channel='FL1', xscale='log', bins=1024) >>> plt.show() -.. image:: https://www.dropbox.com/s/nvlp43qz85th4al/python_tutorial_plot_hist1d_2.png?raw=1 +.. image:: /_static/img/python_tutorial/python_tutorial_plot_hist1d_2.png Finally, :func:`FlowCal.plot.hist1d` can plot several FCSData objects at the same time. Let's now load 3 FCSData objects, transform all channels to a.u., and plot the ``FL1`` channel of all three with transparency. ->>> filenames = ['FCFiles/Data{:03d}.fcs'.format(i + 2) for i in range(3)] +>>> filenames = ['FCFiles/sample{:03d}.fcs'.format(i + 9) for i in range(3)] >>> d = [FlowCal.io.FCSData(filename) for filename in filenames] >>> d = [FlowCal.transform.to_rfi(di) for di in d] >>> FlowCal.plot.hist1d(d, channel='FL1', alpha=0.7, bins=128) >>> plt.legend(filenames, loc='upper left') >>> plt.show() -.. image:: https://www.dropbox.com/s/3q6htlqrr28mw6h/python_tutorial_plot_hist1d_3.png?raw=1 +.. image:: /_static/img/python_tutorial/python_tutorial_plot_hist1d_3.png Density Plots ------------- @@ -56,7 +57,7 @@ Let's look at the ``FSC`` and ``SSC`` channels in our sample ``s``. >>> FlowCal.plot.density2d(s, channels=['FSC', 'SSC']) >>> plt.show() -.. image:: https://www.dropbox.com/s/l041acoupnlb5os/python_tutorial_plot_density_2d_1.png?raw=1 +.. image:: /_static/img/python_tutorial/python_tutorial_plot_density_2d_1.png The color indicates the number of events in the region, with red indicating a bigger number than yellow and blue, in that order, by default. Similarly to :func:`FlowCal.plot.hist1d`, :func:`FlowCal.plot.density2d` uses logicle scaling by default. In addition, :func:`FlowCal.plot.density2d` applies, by default, gaussian smoothing to the density plot. @@ -65,9 +66,9 @@ The color indicates the number of events in the region, with red indicating a bi >>> FlowCal.plot.density2d(s, channels=['FSC', 'SSC'], mode='scatter') >>> plt.show() -.. image:: https://www.dropbox.com/s/j2fe7f7drib5nvs/python_tutorial_plot_density_2d_2.png?raw=1 +.. image:: /_static/img/python_tutorial/python_tutorial_plot_density_2d_2.png -The last plot shows three distinct populations. The one in the middle corresponds to cells, whereas the ones at the left and below correspond to non-biological debris. We will see how to "gate", or select only one population, in the :doc:`gating tutorial `. +The last plot shows three distinct populations. The large one in the middle corresponds to cells, whereas the ones at the left and below correspond to non-biological debris. We will see how to "gate", or select only one population, in the :doc:`gating tutorial `. Combined Histogram and Density Plots ------------------------------------ @@ -83,10 +84,60 @@ In particular, :func:`FlowCal.plot.density_and_hist` uses :func:`FlowCal.plot.hi >>> plt.tight_layout() >>> plt.show() -.. image:: https://www.dropbox.com/s/apb0ep5xp1idnht/python_tutorial_plot_density_and_hist_1.png?raw=1 +.. image:: /_static/img/python_tutorial/python_tutorial_plot_density_and_hist_1.png :func:`FlowCal.plot.density_and_hist` can also plot data before and after applying gates. We will see this in the :doc:`gating tutorial `. +Violin Plots +------------ + +Histograms, as shown above, can be used to plot and compare data from multiple samples. However, they can easily get too crowded. A more compact way is to use a violin plot, wherein vertical, normalized, symmetrical histograms ("violins") are shown centered on corresponding x-axis values. We can do this with the :func:`FlowCal.plot.violin` function. + +>>> filenames = ['FCFiles/sample{:03d}.fcs'.format(i+6) for i in range(10)] +>>> d = [FlowCal.io.FCSData(filename) for filename in filenames] +>>> d = [FlowCal.transform.to_rfi(di) for di in d] +>>> dapg = np.array([0, 2.33, 4.36, 8.16, 15.3, 28.6, 53.5, 100, 187, 350]) +>>> FlowCal.plot.violin(data=d, channel='FL1', positions=dapg, xlabel='DAPG (uM)', xscale='log', ylim=(1e0,2e3)) +>>> plt.show() + +.. image:: /_static/img/python_tutorial/python_tutorial_plot_violin_1.png + +Note that the x axis has been plotted on a logarithmic scale using the ``xscale`` argument. Because data at position x=0 is specified, :func:`FlowCal.plot.violin` places it separately on the left side of the plot. In contrast, the y-axis is plotted on a ``logicle`` scale by default. However, it can be switched to ``'log'`` or ``'linear'`` using the argument ``yscale``. Horizontal violin plots can also be generated by setting the ``vert`` argument to ``False``. For more options, consult the function documentation. + +"Dose response" or "transfer" functions are common in biology. These sometimes include minimum (negative) and maximum (positive) controls, and are often approximated by mathematical models. The :func:`FlowCal.plot.violin_dose_response` function can be used to plot a full dose response dataset, including min data, max data, and a mathematical model. Min and max data are illustrated to the left of the plot, and the mathematical model is correctly illustrated even when a position=0 violin is illustrated separately when ``xscale`` is ``'log'``. + +>>> # Function specifying mathematical model +>>> def dapg_sensor_model(dapg_concentration): +>>> mn = 20 +>>> mx = 250. +>>> K = 20. +>>> n = 3.57 +>>> if dapg_concentration <= 0: +>>> return mn +>>> else: +>>> return mn + ((mx-mn)/(1+((K/dapg_concentration)**n))) +>>> +>>> # Plot +>>> FlowCal.plot.violin_dose_response( +>>> data=d, +>>> channel='FL1', +>>> positions=dapg, +>>> min_data=d[0], +>>> max_data=d[-1], +>>> model_fxn=dapg_sensor_model, +>>> xscale='log', +>>> yscale='log', +>>> ylim=(1e0,2e3), +>>> draw_model_kwargs={'color':'gray', +>>> 'linewidth':3, +>>> 'zorder':-1, +>>> 'solid_capstyle':'butt'}) +>>> plt.xlabel('DAPG Concentration ($\mu M$)') +>>> plt.ylabel('FL1 Fluorescence (a.u.)') +>>> plt.show() + +.. image:: /_static/img/python_tutorial/python_tutorial_plot_violin_2.png + Other Plotting Functions ------------------------ These are not the only functions in :mod:`FlowCal.plot`. For more information, consult the API reference. diff --git a/doc/python_tutorial/read.rst b/doc/python_tutorial/read.rst index 67c357c..a485761 100644 --- a/doc/python_tutorial/read.rst +++ b/doc/python_tutorial/read.rst @@ -9,14 +9,14 @@ To start, navigate to the ``examples`` directory included with FlowCal, and open FCS files are standard files in which flow cytometry data is stored. Normally, one FCS file corresponds to one sample. -The object :class:`FlowCal.io.FCSData` allows a user to open an FCS file. The following instruction opens the file ``Data001.fcs`` from the ``FCFiles`` folder, loads the information into an ``FCSData`` object, and assigns it to a variable ``s``. +The object :class:`FlowCal.io.FCSData` allows a user to open an FCS file. The following instruction opens the file ``sample006.fcs`` from the ``FCFiles`` folder, loads the information into an ``FCSData`` object, and assigns it to a variable ``s``. ->>> s = FlowCal.io.FCSData('FCFiles/Data001.fcs') +>>> s = FlowCal.io.FCSData('FCFiles/sample006.fcs') An ``FCSData`` object is a 2D ``numpy`` array with a few additional features. The first dimension indexes the event number, and the second dimension indexes the flow cytometry channel (or "parameter", as called by the FCS standard). We can see the number of events and channels using the standard ``numpy``'s ``shape`` property: >>> print(s.shape) -(30000, 8) +(32224, 8) As with any ``numpy`` array, we can slice an ``FCSData`` object. For example, let's obtain the first 100 events. @@ -28,12 +28,12 @@ Note that the product of slicing an FCSData object is also an FCSData object. We >>> s_sub_ch = s[:, [3, 4, 5]] >>> print(s_sub_ch.shape) -(30000, 3) +(32224, 3) However, it is not immediately obvious what channels we are getting. Fortunately, the ``FCSData`` object contains some additional information about the acquisition settings. In particular, we can check the name of the channels with the ``channels`` property. >>> print(s.channels) -('TIME', 'FSC', 'SSC', 'FL1', 'FL2', 'FL3', 'FL1W', 'FL1A') +('TIME', 'FSC', 'SSC', 'FL1', 'FL2', 'FL3', 'FSCW', 'FSCA') >>> print(s_sub_ch.channels) ('FL1', 'FL2', 'FL3') diff --git a/doc/python_tutorial/transform.rst b/doc/python_tutorial/transform.rst index 268a5a4..c42661e 100644 --- a/doc/python_tutorial/transform.rst +++ b/doc/python_tutorial/transform.rst @@ -10,9 +10,9 @@ To start, navigate to the ``examples`` directory included with FlowCal, and open Transforming to Arbitrary Fluorescence Units (a.u.) --------------------------------------------------- -Start by loading file ``Data001.fcs`` into an ``FCSData`` object called ``s``. +Start by loading file ``sample006.fcs`` into an ``FCSData`` object called ``s``. ->>> s = FlowCal.io.FCSData('FCFiles/Data001.fcs') +>>> s = FlowCal.io.FCSData('FCFiles/sample006.fcs') Let's now visualize the contents of the ``FL1`` channel. We will explore ``FlowCal``'s plotting functions in the :doc:`plotting tutorial `, but for now let's just use ``matplotlib``'s ``hist`` function. @@ -20,9 +20,9 @@ Let's now visualize the contents of the ``FL1`` channel. We will explore ``FlowC >>> plt.hist(s[:, 'FL1'], bins=100) >>> plt.show() -.. image:: https://www.dropbox.com/s/c481hlzcq0k2wzp/python_tutorial_transform_1.png?raw=1 +.. image:: /_static/img/python_tutorial/python_tutorial_transform_1.png -Note that the range of the x axis is from 0 to 1024. However, our acquisition software showed fluorescence values from 1 to 10000. Where does the difference come from? An FCS file normally stores raw numbers as they are are reported by the instrument sensors. These are referred to as "channel numbers". The FCS file also contains enough information to transform these numbers back to proper fluorescence units, called Relative Fluorescence Intensities (RFI), or more commonly, arbitrary fluorescence units (a.u.). Depending on the instrument used, this conversion sometimes involves a simple scaling factor, but other times requires a non-straigthforward exponential transformation. The latter is our case. +Note that the range of the x axis is from 0 to around 800. However, our acquisition software showed fluorescence values from 1 to 10000. Where does the difference come from? An FCS file normally stores raw numbers as they are are reported by the instrument sensors. These are referred to as "channel numbers". The FCS file also contains enough information to transform these numbers back to proper fluorescence units, called Relative Fluorescence Intensities (RFI), or more commonly, arbitrary fluorescence units (a.u.). Depending on the instrument used, this conversion sometimes involves a simple scaling factor, but other times requires a non-straigthforward exponential transformation. The latter is our case. Fortunately, ``FlowCal`` includes :func:`FlowCal.transform.to_rfi`, a function that reads all the necessary paremeters from the FCS file and figures out how to convert data back to a.u. @@ -36,7 +36,7 @@ Fortunately, ``FlowCal`` includes :func:`FlowCal.transform.to_rfi`, a function t >>> plt.xscale('log') >>> plt.show() -.. image:: https://www.dropbox.com/s/u1wzrlfjdo3af14/python_tutorial_transform_2.png?raw=1 +.. image:: /_static/img/python_tutorial/python_tutorial_transform_2.png We will explore a more convenient way to plot transformed data in the :doc:`plotting tutorial `. diff --git a/examples/FCFiles/Beads006.fcs b/examples/FCFiles/Beads006.fcs deleted file mode 100644 index be6fcc1..0000000 Binary files a/examples/FCFiles/Beads006.fcs and /dev/null differ diff --git a/examples/FCFiles/Data001.fcs b/examples/FCFiles/Data001.fcs deleted file mode 100644 index e443922..0000000 Binary files a/examples/FCFiles/Data001.fcs and /dev/null differ diff --git a/examples/FCFiles/Data002.fcs b/examples/FCFiles/Data002.fcs deleted file mode 100644 index 951ab25..0000000 Binary files a/examples/FCFiles/Data002.fcs and /dev/null differ diff --git a/examples/FCFiles/Data003.fcs b/examples/FCFiles/Data003.fcs deleted file mode 100644 index 5142fdf..0000000 Binary files a/examples/FCFiles/Data003.fcs and /dev/null differ diff --git a/examples/FCFiles/Data004.fcs b/examples/FCFiles/Data004.fcs deleted file mode 100644 index eb4a6d0..0000000 Binary files a/examples/FCFiles/Data004.fcs and /dev/null differ diff --git a/examples/FCFiles/Data005.fcs b/examples/FCFiles/Data005.fcs deleted file mode 100644 index 6a61258..0000000 Binary files a/examples/FCFiles/Data005.fcs and /dev/null differ diff --git a/examples/FCFiles/max/sample002.fcs b/examples/FCFiles/max/sample002.fcs new file mode 100644 index 0000000..04501b5 Binary files /dev/null and b/examples/FCFiles/max/sample002.fcs differ diff --git a/examples/FCFiles/max/sample008.fcs b/examples/FCFiles/max/sample008.fcs new file mode 100644 index 0000000..5673167 Binary files /dev/null and b/examples/FCFiles/max/sample008.fcs differ diff --git a/examples/FCFiles/min/sample001.fcs b/examples/FCFiles/min/sample001.fcs new file mode 100644 index 0000000..158bedc Binary files /dev/null and b/examples/FCFiles/min/sample001.fcs differ diff --git a/examples/FCFiles/min/sample004.fcs b/examples/FCFiles/min/sample004.fcs new file mode 100644 index 0000000..0055fa2 Binary files /dev/null and b/examples/FCFiles/min/sample004.fcs differ diff --git a/examples/FCFiles/sample001.fcs b/examples/FCFiles/sample001.fcs new file mode 100644 index 0000000..18b3942 Binary files /dev/null and b/examples/FCFiles/sample001.fcs differ diff --git a/examples/FCFiles/sample006.fcs b/examples/FCFiles/sample006.fcs new file mode 100644 index 0000000..83f3db2 Binary files /dev/null and b/examples/FCFiles/sample006.fcs differ diff --git a/examples/FCFiles/sample007.fcs b/examples/FCFiles/sample007.fcs new file mode 100644 index 0000000..85ca13e Binary files /dev/null and b/examples/FCFiles/sample007.fcs differ diff --git a/examples/FCFiles/sample008.fcs b/examples/FCFiles/sample008.fcs new file mode 100644 index 0000000..a73f322 Binary files /dev/null and b/examples/FCFiles/sample008.fcs differ diff --git a/examples/FCFiles/sample009.fcs b/examples/FCFiles/sample009.fcs new file mode 100644 index 0000000..19a49dd Binary files /dev/null and b/examples/FCFiles/sample009.fcs differ diff --git a/examples/FCFiles/sample010.fcs b/examples/FCFiles/sample010.fcs new file mode 100644 index 0000000..e7f74ca Binary files /dev/null and b/examples/FCFiles/sample010.fcs differ diff --git a/examples/FCFiles/sample011.fcs b/examples/FCFiles/sample011.fcs new file mode 100644 index 0000000..c68490c Binary files /dev/null and b/examples/FCFiles/sample011.fcs differ diff --git a/examples/FCFiles/sample012.fcs b/examples/FCFiles/sample012.fcs new file mode 100644 index 0000000..f7ebb82 Binary files /dev/null and b/examples/FCFiles/sample012.fcs differ diff --git a/examples/FCFiles/sample013.fcs b/examples/FCFiles/sample013.fcs new file mode 100644 index 0000000..d959490 Binary files /dev/null and b/examples/FCFiles/sample013.fcs differ diff --git a/examples/FCFiles/sample014.fcs b/examples/FCFiles/sample014.fcs new file mode 100644 index 0000000..c49c348 Binary files /dev/null and b/examples/FCFiles/sample014.fcs differ diff --git a/examples/FCFiles/sample015.fcs b/examples/FCFiles/sample015.fcs new file mode 100644 index 0000000..7578c33 Binary files /dev/null and b/examples/FCFiles/sample015.fcs differ diff --git a/examples/analyze_excel_ui.py b/examples/analyze_excel_ui.py index e7876ee..266c855 100644 --- a/examples/analyze_excel_ui.py +++ b/examples/analyze_excel_ui.py @@ -5,13 +5,12 @@ This script is divided in two parts. Part one uses FlowCal's Excel UI to process calibration beads data and cell samples according to an input Excel file. The exact operations performed are identical to when normally -using the Excel UI. However, instead of generating an output Excel file, a -list of objects representing gated and transformed flow cytometry samples -is obtained. +using the Excel UI. However, instead of generating an output Excel file, an +OrderedDict of objects representing gated and transformed flow cytometry +samples is obtained. Part two exemplifies how to use the processed cell sample data with -FlowCal's plotting and statistics modules, in order to produce interesting -plots. +FlowCal's plotting and statistics modules to produce interesting plots. For details about the experiment, samples, and instrument used, please consult readme.txt. @@ -19,6 +18,7 @@ """ import numpy as np import pandas as pd +import matplotlib as mpl import matplotlib.pyplot as plt import FlowCal @@ -28,7 +28,7 @@ # Part 1: Obtaining processed flow cytometry data from an input Excel file ### - # ``FlowCal.excel_ui.read_table() reads a table from an Excel file, and + # ``FlowCal.excel_ui.read_table() reads a table from an Excel file and # returns the contents as a pandas dataframe. # We need tables describing the instruments used, the calibration beads # files, and the cell samples. @@ -48,11 +48,11 @@ # density gating as specified in this table, and generates MEF # transformation functions that can later be applied to cell samples. # To do so, it requires a table describing the flow cytometer used - # (``instruments_table``). Here, we also use verbose mode, and indicate that + # (``instruments_table``). Here, we also use verbose mode and indicate that # plots describing individual steps should be generated in the folder # "plot_beads". The result is a dictionary of ``FCSData`` objects # representing gated and transformed calibration beads samples - # (``beads_samples``), and a dictionary containing MEF transformation + # (``beads_samples``) and a dictionary containing MEF transformation # functions (``mef_transform_fxns``). This will be used later to process # cell samples. beads_samples, mef_transform_fxns = FlowCal.excel_ui.process_beads_table( @@ -64,15 +64,15 @@ # Process cell samples # ``FlowCal.excel_ui.process_samples_table()`` reads a properly formatted - # table describing cell samples (``samples_table``), and performs density + # table describing cell samples (``samples_table``) and performs density # gating and unit transformations as specified in this table. To do so, it # requires a table describing the flow cytometer used # (``instruments_table``) and a corresponding dictionary with MEF # transformation functions (``mef_transform_fxns``). Here, we also use - # verbose mode, and indicate that plots of each sample should be generated - # in the folder "plot_samples". The result is a list of ``FCSData`` objects - # (``samples``) representing gated and transformed flow cytometry cell - # samples. + # verbose mode and indicate that plots of each sample should be generated + # in the folder "plot_samples". The result is an OrderedDict of + # ``FCSData`` objects keyed on the sample ID (``samples``) representing + # gated and transformed flow cytometry cell samples. samples = FlowCal.excel_ui.process_samples_table( samples_table=samples_table, instruments_table=instruments_table, @@ -85,50 +85,153 @@ # Part 2: Examples on how to use processed cell sample data ### - # We will read IPTG concentrations for each sample from the Excel file. - # ``samples_table`` contains all the data from sheet "Samples", including - # data not directly used by ``FlowCal.excel_ui.process_samples_table()``. - # ``samples_table`` is a pandas dataframe, with each column having the - # same name as the corresponding header in the Excel file. - # We multiply the IPTG concentration by 1000 to convert to micromolar. - iptg = samples_table['IPTG (mM)']*1000 - - # Histogram of all samples - # Here, we plot the fluorescence histograms of all five samples in the same - # figure, using ``FlowCal.plot.hist1d``. Note how this function can be used + # Each entry in the Excel table has a corresponding ID, which can be used + # to reference information associated with or the sample loaded from a + # row in the Excel file. Collect the IDs of the non-control samples (i.e., + # 'S0001', ..., 'S0010'). + sample_ids = ['S00{:02}'.format(n) for n in range(1,10+1)] + + # We will read DAPG concentrations from the Excel file. ``samples_table`` + # contains all the data from sheet "Samples", including data not directly + # used by ``FlowCal.excel_ui.process_samples_table()``. ``samples_table`` + # is a pandas dataframe, with each column having the same name as the + # corresponding header in the Excel file. + dapg = samples_table.loc[sample_ids,'DAPG (uM)'] + + # Plot 1: Histogram of all samples + # + # Here, we plot the fluorescence histograms of all ten samples in the same + # figure using ``FlowCal.plot.hist1d``. Note how this function can be used # in the context of accessory matplotlib functions to modify the axes - # limits and labels and add a legend, among others. + # limits and labels and to add a legend, among other things. + + # Color each histogram according to its corresponding DAPG concentration. + # Use a perceptually uniform colormap (cividis), and transition among + # colors using a logarithmic normalization, which comports with the + # logarithmically spaced DAPG concentrations. + cmap = mpl.cm.get_cmap('cividis') + norm = mpl.colors.LogNorm(vmin=1e0, vmax=500) + colors = [cmap(norm(dapg_i)) if dapg_i > 0 else cmap(0.0) + for dapg_i in dapg] + plt.figure(figsize=(6,3.5)) - FlowCal.plot.hist1d(list(samples.values()), + FlowCal.plot.hist1d([samples[s_id] for s_id in sample_ids], channel='FL1', histtype='step', - bins=128) - plt.ylim([0, 2000]) + bins=128, + edgecolor=colors) + plt.ylim((0,2500)) + plt.xlim((0,5e4)) plt.xlabel('FL1 (Molecules of Equivalent Fluorescein, MEFL)') - plt.legend(['{} $\mu M$ IPTG'.format(i) for i in iptg], + plt.legend(['{:.1f} $\mu M$ DAPG'.format(i) for i in dapg], loc='upper left', fontsize='small') plt.tight_layout() plt.savefig('histograms.png', dpi=200) plt.close() - # Here we illustrate how to obtain statistics from the fluorescence of each - # sample, and how to use them in a plot. - # The stats module contains functions to calculate different statistics - # such as mean, median, and standard deviation. Here, we calculate the - # geometric mean from channel FL1 of each sample, and plot them against the - # corresponding IPTG concentrations. - samples_fluorescence = [FlowCal.stats.gmean(s, channels='FL1') - for s in list(samples.values())] - plt.figure(figsize=(5.5, 3.5)) - plt.plot(iptg, + # Plot 2: Dose response curve + # + # Here, we illustrate how to obtain statistics from the fluorescence of + # each sample and how to use them in a plot. The stats module contains + # functions to calculate different statistics such as mean, median, and + # standard deviation. In this example, we calculate the mean from channel + # FL1 of each sample and plot them against the corresponding DAPG + # concentrations. + samples_fluorescence = [FlowCal.stats.mean(samples[s_id], channels='FL1') + for s_id in sample_ids] + min_fluorescence = FlowCal.stats.mean(samples['min'], channels='FL1') + max_fluorescence = FlowCal.stats.mean(samples['max'], channels='FL1') + + dapg_color = '#ffc400' # common color used for DAPG-related plots + + plt.figure(figsize=(3,3)) + plt.plot(dapg, samples_fluorescence, marker='o', - color=(0, 0.4, 0.7)) - plt.xlabel('IPTG Concentration ($\mu M$)') + color=dapg_color) + + # Illustrate min and max bounds + plt.axhline(min_fluorescence, + color='gray', + linestyle='--', + zorder=-1) + plt.text(s='Min', x=2e2, y=1.6e2, ha='left', va='bottom', color='gray') + plt.axhline(max_fluorescence, + color='gray', + linestyle='--', + zorder=-1) + plt.text(s='Max', x=-0.7, y=5.2e3, ha='left', va='top', color='gray') + + plt.yscale('log') + plt.ylim((5e1,1e4)) + plt.xscale('symlog') + plt.xlim((-1e0, 1e3)) + plt.xlabel('DAPG Concentration ($\mu M$)') plt.ylabel('FL1 Fluorescence (MEFL)') plt.tight_layout() plt.savefig('dose_response.png', dpi=200) plt.close() + # Plot 3: Dose response violin plot + # + # Here, we use a violin plot to show the fluorescence of (almost) all + # cells as a function of DAPG. (The `upper_trim_fraction` and + # `lower_trim_fraction` parameters eliminate the top and bottom 1% of + # cells from each violin for aesthetic reasons. The summary statistic, + # which is illustrated as a horizontal line atop each violin, is + # calculated before cells are removed, though.) We set `yscale` to 'log' + # because the cytometer used to collect this data produces positive + # integer data (as opposed to floating-point data, which can sometimes be + # negative), so the added complexity of a logicle y-scale (which is the + # default) is not necessary. + + # FlowCal violin plots can also illustrate a mathematical model alongside + # the violins. To take advantage of this feature, we first recapitulate a + # model of the fluorescent protein (sfGFP) fluorescence produced by this + # DAPG sensor as a function of DAPG (from this study: + # https://doi.org/10.15252/msb.20209618). sfGFP fluorescence is cellular + # fluorescence minus autofluorescence. + def dapg_sensor_output(dapg_concentration): + mn = 86. + mx = 3147. + K = 20. + n = 3.57 + if dapg_concentration <= 0: + return mn + else: + return mn + ((mx-mn)/(1+((K/dapg_concentration)**n))) + + # To model cellular fluorescence, which we are plotting with this violin + # plot, we must add autofluorescence back to the sfGFP signal. For this + # model, autofluorescence is the mean fluorescence of an E. coli strain + # lacking sfGFP, which our min control is. + autofluorescence = FlowCal.stats.mean(samples['min'], channels='FL1') + def dapg_sensor_cellular_fluorescence(dapg_concentration): + return dapg_sensor_output(dapg_concentration) + autofluorescence + + plt.figure(figsize=(4,3.5)) + FlowCal.plot.violin_dose_response( + data=[samples[s_id] for s_id in sample_ids], + channel='FL1', + positions=dapg, + min_data=samples['min'], + max_data=samples['max'], + model_fxn=dapg_sensor_cellular_fluorescence, + violin_kwargs={'facecolor':dapg_color, + 'edgecolor':'black'}, + violin_width_to_span_fraction=0.075, + xscale='log', + yscale='log', + ylim=(1e1, 3e4), + draw_model_kwargs={'color':'gray', + 'linewidth':3, + 'zorder':-1, + 'solid_capstyle':'butt'}) + plt.xlabel('DAPG Concentration ($\mu M$)') + plt.ylabel('FL1 Fluorescence (MEFL)') + plt.tight_layout() + plt.savefig('dose_response_violin.png', dpi=200) + plt.close() + print("\nDone.") diff --git a/examples/analyze_mef.py b/examples/analyze_mef.py index 21b5bc5..e4c080f 100644 --- a/examples/analyze_mef.py +++ b/examples/analyze_mef.py @@ -3,18 +3,17 @@ FlowCal Python API example, using calibration beads data. This script is divided in three parts. Part one loads and processes -calibration beads data from an FCS file in order to obtain a transformation -function that can later be used to convert cell fluorescence to units of +calibration beads data from FCS files in order to obtain transformation +functions that can later be used to convert cell fluorescence to units of Molecules of Equivalent Fluorophore (MEF). At several points in this process, plots are generated that give insight into the relevant steps. -Part two processes data from five cell samples, and uses the MEF -transformation function from part one to convert fluorescence of these +Part two processes data from twelve cell samples and uses the MEF +transformation functions from part one to convert fluorescence of these samples to MEF. Plots are also generated in this stage. Part three exemplifies how to use the processed cell sample data with -FlowCal's plotting and statistics modules, in order to produce interesting -plots. +FlowCal's plotting and statistics modules to produce interesting plots. For details about the experiment, samples, and instrument used, please consult readme.txt. @@ -23,6 +22,7 @@ import os import os.path import numpy as np +import matplotlib as mpl import matplotlib.pyplot as plt import FlowCal @@ -30,28 +30,43 @@ # Definition of constants ### -# Name of the FCS file containing calibration beads data -beads_filename = 'FCFiles/Beads006.fcs' +# Name of the FCS files containing calibration beads data. The min and max +# controls were measured on separate days with their own beads samples, and +# one control was measured with a different cytometer gain setting, but we can +# still compare the min and max data to our samples because we are calibrating +# all fluorescence measurements to MEF units. +beads_filename = 'FCFiles/sample001.fcs' +min_beads_filename = 'FCFiles/min/sample001.fcs' +max_beads_filename = 'FCFiles/max/sample002.fcs' + # Names of the FCS files containing data from cell samples -samples_filenames = ['FCFiles/Data001.fcs', - 'FCFiles/Data002.fcs', - 'FCFiles/Data003.fcs', - 'FCFiles/Data004.fcs', - 'FCFiles/Data005.fcs', - ] +samples_filenames = ['FCFiles/sample006.fcs', + 'FCFiles/sample007.fcs', + 'FCFiles/sample008.fcs', + 'FCFiles/sample009.fcs', + 'FCFiles/sample010.fcs', + 'FCFiles/sample011.fcs', + 'FCFiles/sample012.fcs', + 'FCFiles/sample013.fcs', + 'FCFiles/sample014.fcs', + 'FCFiles/sample015.fcs'] +min_sample_filename = 'FCFiles/min/sample004.fcs' +max_sample_filename = 'FCFiles/max/sample008.fcs' # Fluorescence values of each bead subpopulation, in MEF. # These values should be taken from the datasheet provided by the bead # manufacturer. We take Molecules of Equivalent Fluorescein (MEFL) to calibrate # the FL1 (GFP) channel. -mefl_values = np.array([0, 646, 1704, 4827, 15991, 47609, 135896, 273006]) +mefl_values = np.array([0, 792, 2079, 6588, 16471, 47497, 137049, 271647]) +min_mefl_values = np.array([0, 771, 2106, 6262, 15183, 45292, 136258, 291042]) +max_mefl_values = np.array([0, 792, 2079, 6588, 16471, 47497, 137049, 271647]) -# IPTG concentration of each cell sample, in micromolar. -iptg = np.array([0, 81, 161, 318, 1000]) +# DAPG concentration of each cell sample, in micromolar. +dapg = np.array([0, 2.33, 4.36, 8.16, 15.3, 28.6, 53.5, 100, 187, 350]) # Plots will be generated at various stages of analysis. The following are the # names of the folders in which we will store these plots. -beads_plot_dir = 'plot_beads' +beads_plot_dir = 'plot_beads' samples_plot_dir = 'plot_samples' if __name__ == "__main__": @@ -71,7 +86,9 @@ # ``FlowCal.io.FCSData(filename)`` returns an object that represents flow # cytometry data loaded from file ``filename``. print("Loading file \"{}\"...".format(beads_filename)) - beads_sample = FlowCal.io.FCSData(beads_filename) + beads_sample = FlowCal.io.FCSData(beads_filename) + min_beads_sample = FlowCal.io.FCSData(min_beads_filename) + max_beads_sample = FlowCal.io.FCSData(max_beads_filename) # Data loaded from an FCS file is in "Channel Units", the raw numbers # reported from the instrument's detectors. The FCS file also contains @@ -79,7 +96,9 @@ # values, commonly referred to as arbitrary fluorescence units (a.u.). # The function ``FlowCal.transform.to_rfi()`` performs this conversion. print("Performing data transformation...") - beads_sample = FlowCal.transform.to_rfi(beads_sample) + beads_sample = FlowCal.transform.to_rfi(beads_sample) + min_beads_sample = FlowCal.transform.to_rfi(min_beads_sample) + max_beads_sample = FlowCal.transform.to_rfi(max_beads_sample) # Gating @@ -93,6 +112,12 @@ beads_sample_gated = FlowCal.gate.start_end(beads_sample, num_start=250, num_end=100) + min_beads_sample_gated = FlowCal.gate.start_end(min_beads_sample, + num_start=250, + num_end=100) + max_beads_sample_gated = FlowCal.gate.start_end(max_beads_sample, + num_start=250, + num_end=100) # ``FlowCal.gate.high_low()`` removes events outside a range specified by # a ``low`` and a ``high`` value. If these are not specified (as shown @@ -112,12 +137,16 @@ # channels. beads_sample_gated = FlowCal.gate.high_low(beads_sample_gated, channels=['FSC','SSC']) + min_beads_sample_gated = FlowCal.gate.high_low(min_beads_sample_gated, + channels=['FSC','SSC']) + max_beads_sample_gated = FlowCal.gate.high_low(max_beads_sample_gated, + channels=['FSC','SSC']) # ``FlowCal.gate.density2d()`` preserves only the densest population as # seen in a 2D density diagram of two channels. This helps remove particle # aggregations and other sparse populations that are not of interest (i.e. # debris). - # We use the forward and side scatter channels, and preserve 30% of the + # We use the forward and side scatter channels and preserve 85% of the # events. Since bead populations form a very narrow cluster in these # channels, we use a smoothing factor (``sigma``) lower than the default # (10). Finally, setting ``full_output=True`` instructs the function to @@ -127,7 +156,19 @@ density_gate_output = FlowCal.gate.density2d( data=beads_sample_gated, channels=['FSC','SSC'], - gate_fraction=0.3, + gate_fraction=0.85, + sigma=5., + full_output=True) + min_beads_sample_gated, __, min_gate_contour = FlowCal.gate.density2d( + data=min_beads_sample_gated, + channels=['FSC','SSC'], + gate_fraction=0.85, + sigma=5., + full_output=True) + max_beads_sample_gated, __, max_gate_contour = FlowCal.gate.density2d( + data=max_beads_sample_gated, + channels=['FSC','SSC'], + gate_fraction=0.85, sigma=5., full_output=True) beads_sample_gated = density_gate_output.gated_data @@ -151,17 +192,22 @@ # Plot filename # The figure can be saved in any format supported by matplotlib (svg, jpg, # etc.) by just changing the extension. - plot_filename = '{}/density_hist_{}.png'.format(beads_plot_dir, 'beads') + plot_filename = '{}/density_hist_{}.png'.format(beads_plot_dir, + 'beads') + min_plot_filename = '{}/min_density_hist_{}.png'.format(beads_plot_dir, + 'beads') + max_plot_filename = '{}/max_density_hist_{}.png'.format(beads_plot_dir, + 'beads') # Plot and save # The function ``FlowCal.plot.density_and_hist()`` plots a combined figure - # with a 2D density plot at the top, and an arbitrary number of 1D + # with a 2D density plot at the top and an arbitrary number of 1D # histograms below. In this case, we will plot the forward/side scatter - # channels in the density plot, and the fluorescence channels FL1 and FL3 + # channels in the density plot and the fluorescence channels FL1 and FL3 # below as two separate histograms. # Note that we are providing data both before (``beads_sample``) and after # (``beads_sample_gated``) gating. Each 1D histogram will display the - # ungated dataset with transparency, and the gated dataset in front with a + # ungated dataset with transparency and the gated dataset in front with a # solid color. In addition, we are providing ``gate_contour`` from the # density gating step, which will be displayed in the density diagram. This # will result in a convenient representation of the data both before and @@ -171,9 +217,25 @@ beads_sample_gated, density_channels=['FSC', 'SSC'], hist_channels=['FL1', 'FL3'], - gate_contour=gate_contour, + gate_contour=gate_contour, density_params=density_params, savefig=plot_filename) + FlowCal.plot.density_and_hist( + min_beads_sample, + min_beads_sample_gated, + density_channels=['FSC', 'SSC'], + hist_channels=['FL1', 'FL3'], + gate_contour=min_gate_contour, + density_params=density_params, + savefig=min_plot_filename) + FlowCal.plot.density_and_hist( + max_beads_sample, + max_beads_sample_gated, + density_channels=['FSC', 'SSC'], + hist_channels=['FL1', 'FL3'], + gate_contour=max_gate_contour, + density_params=density_params, + savefig=max_plot_filename) # Use beads data to obtain a MEF transformation function print("\nCalculating standard curve for channel FL1...") @@ -187,7 +249,7 @@ # using information from both FL1 and FL3 channels. We also enable the # ``verbose`` mode, which prints information of each step. Finally, we # instruct the function to generate plots of each step in the folder - # specified in ``beads_plot_dir``, with the suffix "beads". + # specified in ``beads_plot_dir`` with the suffix "beads". mef_transform_fxn = FlowCal.mef.get_transform_fxn( beads_sample_gated, mef_channels='FL1', @@ -197,6 +259,24 @@ plot=True, plot_dir=beads_plot_dir, plot_filename='beads') + min_mef_transform_fxn = FlowCal.mef.get_transform_fxn( + min_beads_sample_gated, + mef_channels='FL1', + mef_values=min_mefl_values, + clustering_channels=['FL1', 'FL3'], + verbose=True, + plot=True, + plot_dir=beads_plot_dir, + plot_filename='min_beads') + max_mef_transform_fxn = FlowCal.mef.get_transform_fxn( + max_beads_sample_gated, + mef_channels='FL1', + mef_values=max_mefl_values, + clustering_channels=['FL1', 'FL3'], + verbose=True, + plot=True, + plot_dir=beads_plot_dir, + plot_filename='max_beads') ### # Part 2: Processing cell sample data @@ -233,17 +313,17 @@ num_end=100) # Remove saturated events - # We only do this for the forward/side scatter channels, and for + # We only do this for the forward/side scatter channels and for # fluorescence channel FL1. sample_gated = FlowCal.gate.high_low(sample_gated, channels=['FSC','SSC','FL1']) # Apply density gating on the forward/side scatter channels. Preserve - # 50% of the events. Return also a contour around the gated region. + # 85% of the events. Return also a contour around the gated region. density_gate_output = FlowCal.gate.density2d( data=sample_gated, channels=['FSC','SSC'], - gate_fraction=0.5, + gate_fraction=0.85, full_output=True) sample_gated = density_gate_output.gated_data gate_contour = density_gate_output.contour @@ -270,7 +350,7 @@ # Plot and save # In this case, we will plot the forward/side scatter channels in - # the density plot, and a histogram of the fluorescence channel FL1 + # the density plot and a histogram of the fluorescence channel FL1 # below. FlowCal.plot.density_and_hist( sample, @@ -285,46 +365,204 @@ # Save cell sample object samples.append(sample_gated) + # Now, process the min and max control samples + print("\nProcessing control samples...") + # Load, transform, and gate control samples + min_sample = FlowCal.io.FCSData(min_sample_filename) + max_sample = FlowCal.io.FCSData(max_sample_filename) + + min_sample = FlowCal.transform.to_rfi(min_sample) + max_sample = FlowCal.transform.to_rfi(max_sample) + + min_sample = min_mef_transform_fxn(min_sample, channels=['FL1']) + max_sample = max_mef_transform_fxn(max_sample, channels=['FL1']) + + min_sample_gated = FlowCal.gate.start_end(min_sample, + num_start=250, + num_end=100) + max_sample_gated = FlowCal.gate.start_end(max_sample, + num_start=250, + num_end=100) + + min_sample_gated = FlowCal.gate.high_low(min_sample_gated, + channels=['FSC','SSC','FL1']) + max_sample_gated = FlowCal.gate.high_low(max_sample_gated, + channels=['FSC','SSC','FL1']) + + min_sample_gated, __, min_gate_contour = FlowCal.gate.density2d( + data=min_sample_gated, + channels=['FSC','SSC'], + gate_fraction=0.85, + full_output=True) + max_sample_gated, __, max_gate_contour = FlowCal.gate.density2d( + data=max_sample_gated, + channels=['FSC','SSC'], + gate_fraction=0.85, + full_output=True) + + # Plot and save + min_plot_filename = '{}/density_hist_min.png'.format(samples_plot_dir) + max_plot_filename = '{}/density_hist_max.png'.format(samples_plot_dir) + + FlowCal.plot.density_and_hist( + min_sample, + min_sample_gated, + density_channels=['FSC','SSC'], + hist_channels=['FL1'], + gate_contour=min_gate_contour, + density_params=density_params, + hist_params=hist_params, + savefig=min_plot_filename) + FlowCal.plot.density_and_hist( + max_sample, + max_sample_gated, + density_channels=['FSC','SSC'], + hist_channels=['FL1'], + gate_contour=max_gate_contour, + density_params=density_params, + hist_params=hist_params, + savefig=max_plot_filename) + ### # Part 3: Examples on how to use processed cell sample data ### - # Histogram of all samples - # Here, we plot the fluorescence histograms of all five samples in the same + # Plot 1: Histogram of all samples + # + # Here, we plot the fluorescence histograms of all ten samples in the same # figure, using ``FlowCal.plot.hist1d``. Note how this function can be used # in the context of accessory matplotlib functions to modify the axes - # limits and labels and add a legend, among others. + # limits and labels and to add a legend, among other things. + + # Color each histogram according to its corresponding DAPG concentration. + # Use a perceptually uniform colormap (cividis), and transition among + # colors using a logarithmic normalization, which comports with the + # logarithmically spaced DAPG concentrations. + cmap = mpl.cm.get_cmap('cividis') + norm = mpl.colors.LogNorm(vmin=1e0, vmax=500) + colors = [cmap(norm(dapg_i)) if dapg_i > 0 else cmap(0.0) + for dapg_i in dapg] + plt.figure(figsize=(6,3.5)) FlowCal.plot.hist1d(samples, channel='FL1', histtype='step', - bins=128) - plt.ylim([0, 2000]) + bins=128, + edgecolor=colors) + plt.ylim((0,2500)) + plt.xlim((0,5e4)) plt.xlabel('FL1 (Molecules of Equivalent Fluorescein, MEFL)') - plt.legend(['{} $\mu M$ IPTG'.format(i) for i in iptg], + plt.legend(['{} $\mu M$ DAPG'.format(i) for i in dapg], loc='upper left', fontsize='small') plt.tight_layout() plt.savefig('histograms.png', dpi=200) plt.close() - # Here we illustrate how to obtain statistics from the fluorescence of each - # sample, and how to use them in a plot. - # The stats module contains functions to calculate different statistics - # such as mean, median, and standard deviation. Here, we calculate the - # geometric mean from channel FL1 of each sample, and plot them against the - # corresponding IPTG concentrations. - samples_fluorescence = [FlowCal.stats.gmean(s, channels='FL1') + # Plot 2: Dose response curve + # + # Here, we illustrate how to obtain statistics from the fluorescence of + # each sample and how to use them in a plot. The stats module contains + # functions to calculate different statistics such as mean, median, and + # standard deviation. In this example, we calculate the mean from channel + # FL1 of each sample and plot them against the corresponding DAPG + # concentrations. + samples_fluorescence = [FlowCal.stats.mean(s, channels='FL1') for s in samples] - plt.figure(figsize=(5.5, 3.5)) - plt.plot(iptg, + min_fluorescence = FlowCal.stats.mean(min_sample_gated, + channels='FL1') + max_fluorescence = FlowCal.stats.mean(max_sample_gated, + channels='FL1') + + dapg_color = '#ffc400' # common color used for DAPG-related plots + + plt.figure(figsize=(3,3)) + plt.plot(dapg, samples_fluorescence, marker='o', - color=(0, 0.4, 0.7)) - plt.xlabel('IPTG Concentration ($\mu M$)') + color=dapg_color) + + # Illustrate min and max bounds + plt.axhline(min_fluorescence, + color='gray', + linestyle='--', + zorder=-1) + plt.text(s='Min', x=2e2, y=1.6e2, ha='left', va='bottom', color='gray') + plt.axhline(max_fluorescence, + color='gray', + linestyle='--', + zorder=-1) + plt.text(s='Max', x=-0.7, y=5.2e3, ha='left', va='top', color='gray') + + plt.yscale('log') + plt.ylim((5e1,1e4)) + plt.xscale('symlog') + plt.xlim((-1e0, 1e3)) + plt.xlabel('DAPG Concentration ($\mu M$)') plt.ylabel('FL1 Fluorescence (MEFL)') plt.tight_layout() plt.savefig('dose_response.png', dpi=200) plt.close() + # Plot 3: Dose response violin plot + # + # Here, we use a violin plot to show the fluorescence of (almost) all + # cells as a function of DAPG. (The `upper_trim_fraction` and + # `lower_trim_fraction` parameters eliminate the top and bottom 1% of + # cells from each violin for aesthetic reasons. The summary statistic, + # which is illustrated as a horizontal line atop each violin, is + # calculated before cells are removed, though.) We set `yscale` to 'log' + # because the cytometer used to collect this data produces positive + # integer data (as opposed to floating-point data, which can sometimes be + # negative), so the added complexity of a logicle y-scale (which is the + # default) is not necessary. + + # FlowCal violin plots can also illustrate a mathematical model alongside + # the violins. To take advantage of this feature, we first recapitulate a + # model of the fluorescent protein (sfGFP) fluorescence produced by this + # DAPG sensor as a function of DAPG (from this study: + # https://doi.org/10.15252/msb.20209618). sfGFP fluorescence is cellular + # fluorescence minus autofluorescence. + def dapg_sensor_output(dapg_concentration): + mn = 86. + mx = 3147. + K = 20. + n = 3.57 + if dapg_concentration <= 0: + return mn + else: + return mn + ((mx-mn)/(1+((K/dapg_concentration)**n))) + + # To model cellular fluorescence, which we are plotting with this violin + # plot, we must add autofluorescence back to the sfGFP signal. For this + # model, autofluorescence is the mean fluorescence of an E. coli strain + # lacking sfGFP, which our min control is. + autofluorescence = FlowCal.stats.mean(min_sample_gated, channels='FL1') + def dapg_sensor_cellular_fluorescence(dapg_concentration): + return dapg_sensor_output(dapg_concentration) + autofluorescence + + plt.figure(figsize=(4,3.5)) + FlowCal.plot.violin_dose_response( + data=samples, + channel='FL1', + positions=dapg, + min_data=min_sample_gated, + max_data=max_sample_gated, + model_fxn=dapg_sensor_cellular_fluorescence, + violin_kwargs={'facecolor':dapg_color, + 'edgecolor':'black'}, + violin_width_to_span_fraction=0.075, + xscale='log', + yscale='log', + ylim=(1e1, 3e4), + draw_model_kwargs={'color':'gray', + 'linewidth':3, + 'zorder':-1, + 'solid_capstyle':'butt'}) + plt.xlabel('DAPG Concentration ($\mu M$)') + plt.ylabel('FL1 Fluorescence (MEFL)') + plt.tight_layout() + plt.savefig('dose_response_violin.png', dpi=200) + plt.close() + print("\nDone.") diff --git a/examples/analyze_no_mef.py b/examples/analyze_no_mef.py index cfb1739..7b2209e 100644 --- a/examples/analyze_no_mef.py +++ b/examples/analyze_no_mef.py @@ -2,12 +2,11 @@ """ FlowCal Python API example, without using calibration beads data. -This script is divided in two parts. Part one processes data from five cell -samples, and generates plots of each one. +This script is divided in two parts. Part one processes data from ten cell +samples and generates plots of each one. Part two exemplifies how to use the processed cell sample data with -FlowCal's plotting and statistics modules, in order to produce interesting -plots. +FlowCal's plotting and statistics modules to produce interesting plots. For details about the experiment, samples, and instrument used, please consult readme.txt. @@ -16,6 +15,7 @@ import os import os.path import numpy as np +import matplotlib as mpl import matplotlib.pyplot as plt import FlowCal @@ -24,15 +24,19 @@ ### # Names of the FCS files containing data from cell samples -samples_filenames = ['FCFiles/Data001.fcs', - 'FCFiles/Data002.fcs', - 'FCFiles/Data003.fcs', - 'FCFiles/Data004.fcs', - 'FCFiles/Data005.fcs', - ] +samples_filenames = ['FCFiles/sample006.fcs', + 'FCFiles/sample007.fcs', + 'FCFiles/sample008.fcs', + 'FCFiles/sample009.fcs', + 'FCFiles/sample010.fcs', + 'FCFiles/sample011.fcs', + 'FCFiles/sample012.fcs', + 'FCFiles/sample013.fcs', + 'FCFiles/sample014.fcs', + 'FCFiles/sample015.fcs'] -# IPTG concentration of each cell sample, in micromolar. -iptg = np.array([0, 81, 161, 318, 1000]) +# DAPG concentration of each cell sample, in micromolar. +dapg = np.array([0, 2.33, 4.36, 8.16, 15.3, 28.6, 53.5, 100, 187, 350]) # Plots will be generated after gating and transforming cell samples. These # will be stored in the following folder. @@ -74,7 +78,7 @@ # Gating # Gating is the process of removing measurements of irrelevant - # particles, while retaining only the population of interest. + # particles while retaining only the population of interest. print("Performing gating...") # ``FlowCal.gate.start_end()`` removes the first and last few events. @@ -107,7 +111,7 @@ # seen in a 2D density diagram of two channels. This helps remove # particle aggregations and other sparse populations that are not of # interest (i.e. debris). - # We use the forward and side scatter channels, and preserve 50% of the + # We use the forward and side scatter channels and preserve 85% of the # events. Finally, setting ``full_output=True`` instructs the function # to return additional outputs in the form of a named tuple. # ``gate_contour`` is a curve surrounding the gated region, which we @@ -115,7 +119,7 @@ density_gate_output = FlowCal.gate.density2d( data=sample_gated, channels=['FSC','SSC'], - gate_fraction=0.5, + gate_fraction=0.85, full_output=True) sample_gated = density_gate_output.gated_data gate_contour = density_gate_output.contour @@ -144,14 +148,14 @@ # Plot and save # The function ``FlowCal.plot.density_and_hist()`` plots a combined - # figure with a 2D density plot at the top, and an arbitrary number of + # figure with a 2D density plot at the top and an arbitrary number of # 1D histograms below. In this case, we will plot the forward/side - # scatter channels in the density plot, and a histogram of the + # scatter channels in the density plot and a histogram of the # fluorescence channel FL1 below. # Note that we are providing data both before (``sample``) and after # (``sample_gated``) gating. The 1D histogram will display the ungated - # dataset with transparency, and the gated dataset in front with a solid - # solid color. In addition, we are providing ``gate_contour`` from the + # dataset with transparency and the gated dataset in front with a solid + # color. In addition, we are providing ``gate_contour`` from the # density gating step, which will be displayed in the density diagram. # This will result in a convenient representation of the data both # before and after gating. @@ -172,42 +176,112 @@ # Part 3: Examples on how to use processed cell sample data ### - # Histogram of all samples - # Here, we plot the fluorescence histograms of all five samples in the same - # figure, using ``FlowCal.plot.hist1d``. Note how this function can be used + # Plot 1: Histogram of all samples + # + # Here, we plot the fluorescence histograms of all ten samples in the same + # figure using ``FlowCal.plot.hist1d``. Note how this function can be used # in the context of accessory matplotlib functions to modify the axes - # limits and labels and add a legend, among others. + # limits and labels and to add a legend, among other things. + + # Color each histogram according to its corresponding DAPG concentration. + # Use a perceptually uniform colormap (cividis), and transition among + # colors using a logarithmic normalization, which comports with the + # logarithmically spaced DAPG concentrations. + cmap = mpl.cm.get_cmap('cividis') + norm = mpl.colors.LogNorm(vmin=1e0, vmax=500) + colors = [cmap(norm(dapg_i)) if dapg_i > 0 else cmap(0.0) + for dapg_i in dapg] + plt.figure(figsize=(6,3.5)) FlowCal.plot.hist1d(samples, channel='FL1', histtype='step', - bins=128) - plt.ylim([0, 2000]) + bins=128, + edgecolor=colors) + plt.ylim((0,2500)) + plt.xlim((0,5e3)) plt.xlabel('FL1 Fluorescence (a.u.)') - plt.legend(['{} $\mu M$ IPTG'.format(i) for i in iptg], + plt.legend(['{} $\mu M$ DAPG'.format(i) for i in dapg], loc='upper left', fontsize='small') plt.tight_layout() plt.savefig('histograms.png', dpi=200) plt.close() - # Here we illustrate how to obtain statistics from the fluorescence of each - # sample, and how to use them in a plot. - # The stats module contains functions to calculate different statistics - # such as mean, median, and standard deviation. Here, we calculate the - # geometric mean from channel FL1 of each sample, and plot them against the - # corresponding IPTG concentrations. - samples_fluorescence = [FlowCal.stats.gmean(s, channels='FL1') + # Plot 2: Dose response curve + # + # Here, we illustrate how to obtain statistics from the fluorescence of + # each sample and how to use them in a plot. The stats module contains + # functions to calculate different statistics such as mean, median, and + # standard deviation. In this example, we calculate the mean from channel + # FL1 of each sample and plot them against the corresponding DAPG + # concentrations. + samples_fluorescence = [FlowCal.stats.mean(s, channels='FL1') for s in samples] - plt.figure(figsize=(5.5, 3.5)) - plt.plot(iptg, + + dapg_color = '#ffc400' # common color used for DAPG-related plots + + plt.figure(figsize=(3,3)) + plt.plot(dapg, samples_fluorescence, marker='o', - color=(0, 0.4, 0.7)) - plt.xlabel('IPTG Concentration ($\mu M$)') + color=dapg_color) + + # Illustrate min and max bounds. Because some of our control samples were + # measured at a different cytometer gain setting and we aren't using MEF + # calibration here, we will use the 0uM and 350uM DAPG concentration + # samples instead. + plt.axhline(samples_fluorescence[0], + color='gray', + linestyle='--', + zorder=-1) + plt.text(s='Min', x=2e2, y=2.0e1, ha='left', va='bottom', color='gray') + plt.axhline(samples_fluorescence[-1], + color='gray', + linestyle='--', + zorder=-1) + plt.text(s='Max', x=-0.7, y=2.1e2, ha='left', va='top', color='gray') + + plt.yscale('log') + plt.ylim((5e0,5e2)) + plt.xscale('symlog') + plt.xlim((-1e0, 1e3)) + plt.xlabel('DAPG Concentration ($\mu M$)') plt.ylabel('FL1 Fluorescence (a.u.)') plt.tight_layout() plt.savefig('dose_response.png', dpi=200) plt.close() + # Plot 3: Dose response violin plot + # + # Here, we use a violin plot to show the fluorescence of (almost) all + # cells as a function of DAPG. (The `upper_trim_fraction` and + # `lower_trim_fraction` parameters eliminate the top and bottom 1% of + # cells from each violin for aesthetic reasons. The summary statistic, + # which is illustrated as a horizontal line atop each violin, is + # calculated before cells are removed, though.) We again use the 0uM and + # 350uM DAPG concentration samples as the min and max data in lieu of + # controls. We also set `yscale` to 'log' because the cytometer used to + # collect this data produces positive integer data (as opposed to + # floating-point data, which can sometimes be negative), so the added + # complexity of a logicle y-scale (which is the default) is not necessary. + plt.figure(figsize=(4,3.5)) + FlowCal.plot.violin_dose_response( + data=samples, + channel='FL1', + positions=dapg, + min_data=samples[0], + max_data=samples[-1], + violin_kwargs={'facecolor':dapg_color, + 'edgecolor':'black'}, + violin_width_to_span_fraction=0.075, + xscale='log', + yscale='log', + ylim=(1e0,2e3)) + plt.xlabel('DAPG Concentration ($\mu M$)') + plt.ylabel('FL1 Fluorescence (a.u.)') + plt.tight_layout() + plt.savefig('dose_response_violin.png', dpi=200) + plt.close() + print("\nDone.") diff --git a/examples/experiment.xlsx b/examples/experiment.xlsx index 272854a..5efdf77 100644 Binary files a/examples/experiment.xlsx and b/examples/experiment.xlsx differ diff --git a/examples/readme.txt b/examples/readme.txt index 944154e..527e557 100644 --- a/examples/readme.txt +++ b/examples/readme.txt @@ -1,13 +1,19 @@ Introduction ============ -This folder contains flow cytometry data from a simple bacterial IPTG-induction experiment, and the necessary files that demonstrate how to analyze this data with FlowCal. +This folder contains flow cytometry data from a simple bacterial 2,4‐diacetylphloroglucinol (DAPG) induction experiment and the necessary files that demonstrate how to analyze this data with FlowCal. -Experimental details -==================== +Experiment description +====================== -This experiment used an E. coli strain with a high-copy plasmid containing sfGFP under control of the IPTG-inducible promoter Ptac. Five samples were started from this strain, with IPTG added to the following final concentrations: 0, 81, 161, 318, and 1000 micromolar. Samples were grown until late exponential, and fluorescence measurements were taken via flow cytometry. The five resulting flow cytometry standard (FCS) files are contained in the "FCFiles" folder, and are named "DataXXX.fcs". An additional sample containing calibration beads was also measured, and the corresponding FCS file is named "Beads006.fcs". +Escherichia coli expressing the fluorescent protein superfolder GFP (sfGFP) from the DAPG-inducible promoter Pphlf were induced to 0, 2.33, 4.36, 8.16, 15.3, 28.6, 53.5, 100, 187, and 350 micromolar DAPG and grown shaking at 37C for ~6 hours prior to fluorophore maturation (wherein cells were incubated with the bacterial translation inhibitor chloramphenicol for one hour to allow fluorescent proteins to mature) and measurement of cellular fluorescence by flow cytometry. The ten resulting flow cytometry standard (FCS) files are located in the "FCFiles/" folder and are named "sampleXXX.fcs". An additional sample containing calibration beads was also measured; its corresponding FCS file is named "sample001.fcs". + +A minimum control sample containing E. coli lacking sfGFP was measured along with its own beads sample on a separate day; the corresponding FCS files are located in the "FCFiles/min/" folder. + +A maximum control sample containing E. coli expressing sfGFP from Pphlf but lacking the DAPG-sensitive PhlF transcriptional repressor was measured along with its own beads sample on a third day; the corresponding FCS files are located in the "FCFiles/max/" folder. + +For more information, see this study: https://doi.org/10.15252/msb.20209618. Details on the flow cytometer used @@ -29,7 +35,7 @@ Examples included Excel UI -------- -This example shows how to process a set of cell sample and bead sample FCS files with the Excel UI, and produce a set of plots and an output Excel file with statistics. +This example shows how to process cell-sample and bead-sample FCS files using the Excel UI to produce plots and an Excel file with statistics. To run, start FlowCal by running the "Run FlowCal" program. An "open file" dialog will appear. Navigate to this folder and select "experiment.xlsx". @@ -37,7 +43,7 @@ To run, start FlowCal by running the "Run FlowCal" program. An "open file" dialo Python API, without using calibration beads data ------------------------------------------------ -This example shows how to process a set of cell sample FCS files entirely from Python with FlowCal. +This example shows how to process cell-sample FCS files entirely from Python with FlowCal. To run, use the script "analyze_no_mef.py" as a regular python program. @@ -45,7 +51,7 @@ To run, use the script "analyze_no_mef.py" as a regular python program. Python API, using calibration beads data ---------------------------------------- -This example shows how to process a set of cell sample and bead sample FCS files entirely from Python with FlowCal. +This example shows how to process cell-sample and bead-sample FCS files entirely from Python with FlowCal. To run, use the script "analyze_mef.py" as a regular python program. @@ -53,6 +59,6 @@ To run, use the script "analyze_mef.py" as a regular python program. Python API, using calibration beads data and an input Excel file ---------------------------------------------------------------- -This example shows how to process a set of cell sample and bead sample FCS files with the Excel UI, and obtain processed flow cytometry data for further analysis in Python. +This example shows how to process cell-sample and bead-sample FCS files using the Excel UI to obtain processed flow cytometry data for further analysis in Python. To run, use the script "analyze_excel_ui.py" as a regular python program.