From b73ab2f1985ea4cad435135b18e395214138a6e3 Mon Sep 17 00:00:00 2001 From: Shannon Axelrod Date: Mon, 12 Aug 2019 16:12:46 -0700 Subject: [PATCH] adding DecodedIntensityTable --- .../decoded_intensity_table.rst | 7 + docs/source/api/data_structures/index.rst | 3 + .../help_and_reference/glossary/glossary.md | 3 + starfish/__init__.py | 1 + starfish/core/codebook/codebook.py | 44 ++--- .../test/test_serialization.py | 9 +- .../decoded_intensity_table.py | 176 ++++++++++++++++++ .../core/intensity_table/intensity_table.py | 98 ---------- .../core/intensity_table/test/factories.py | 72 ++++++- .../test/test_intensity_table_coords.py | 10 +- .../intensity_table/test/test_to_mermaid.py | 7 +- starfish/core/spots/AssignTargets/label.py | 34 ++-- starfish/core/spots/Decode/metric_decoder.py | 3 +- .../Decode/per_round_max_channel_decoder.py | 3 +- .../test/test_decoding_without_spots.py | 8 +- .../DetectSpots/test/test_synthetic_data.py | 17 +- .../core/types/test/test_decoded_spots.py | 9 +- workflows/wdl/iss_published/recipe.py | 2 +- workflows/wdl/iss_spaceTX/recipe.py | 2 +- 19 files changed, 333 insertions(+), 175 deletions(-) create mode 100644 docs/source/api/data_structures/decoded_intensity_table.rst create mode 100644 starfish/core/intensity_table/decoded_intensity_table.py diff --git a/docs/source/api/data_structures/decoded_intensity_table.rst b/docs/source/api/data_structures/decoded_intensity_table.rst new file mode 100644 index 000000000..1b50a9874 --- /dev/null +++ b/docs/source/api/data_structures/decoded_intensity_table.rst @@ -0,0 +1,7 @@ +.. _DecodedIntensityTable: + +DecodedIntensityTable +===================== + +.. autoclass:: starfish.core.intensity_table.decoded_intensity_table.DecodedIntensityTable + :members: diff --git a/docs/source/api/data_structures/index.rst b/docs/source/api/data_structures/index.rst index 4f7aa8edb..f13db93d5 100644 --- a/docs/source/api/data_structures/index.rst +++ b/docs/source/api/data_structures/index.rst @@ -56,3 +56,6 @@ serialization for use in single-cell analysis environments such as Seurat_ and S .. toctree:: intensity_table.rst + +.. toctree:: + decoded_intensity_table.rst diff --git a/docs/source/help_and_reference/glossary/glossary.md b/docs/source/help_and_reference/glossary/glossary.md index d05c0adcb..d81c2252e 100644 --- a/docs/source/help_and_reference/glossary/glossary.md +++ b/docs/source/help_and_reference/glossary/glossary.md @@ -13,6 +13,9 @@ A feature that is the target of quantification by an image-based assays. Common ### IntensityTable An intensity Table contains the features identified in an ImageStack. It can be thought of as an array whose entries are the intensities of each feature across the imaging rounds and channels of a field of view. Starfish exposes several processing tools to decode the features of the table, estimate their qualities, and assign features to cells. +### DecodedIntensityTable +A representation of a decoded intensity table. Contains the features identified in an ImageStack as well as their associated target values. + ### Codeword A codeword maps expected intensities across multiple image tiles within a field of view to the target that is encoded by the codeword. diff --git a/starfish/__init__.py b/starfish/__init__.py index 0b084dfdc..8fba7e14e 100644 --- a/starfish/__init__.py +++ b/starfish/__init__.py @@ -17,6 +17,7 @@ from .core.experiment.experiment import Experiment, FieldOfView from .core.expression_matrix.expression_matrix import ExpressionMatrix from .core.imagestack.imagestack import ImageStack +from .core.intensity_table.decoded_intensity_table import DecodedIntensityTable from .core.intensity_table.intensity_table import IntensityTable from .core.segmentation_mask import SegmentationMaskCollection from .core.starfish import starfish diff --git a/starfish/core/codebook/codebook.py b/starfish/core/codebook/codebook.py index 956859a70..0979bb96c 100644 --- a/starfish/core/codebook/codebook.py +++ b/starfish/core/codebook/codebook.py @@ -16,6 +16,7 @@ MIN_SUPPORTED_VERSION, ) from starfish.core.config import StarfishConfig +from starfish.core.intensity_table.decoded_intensity_table import DecodedIntensityTable from starfish.core.intensity_table.intensity_table import IntensityTable from starfish.core.spacetx_format.util import CodebookValidator from starfish.core.types import Axes, Features, Number @@ -506,7 +507,7 @@ def _validate_decode_intensity_input_matches_codebook_shape( def decode_metric( self, intensities: IntensityTable, max_distance: Number, min_intensity: Number, norm_order: int, metric: str='euclidean' - ) -> IntensityTable: + ) -> DecodedIntensityTable: """ Assigns intensity patterns that have been extracted from an :py:class:`ImageStack` and stored in an :py:class:`IntensityTable` by a :py:class:`SpotFinder` to the gene targets that @@ -552,10 +553,11 @@ def decode_metric( # add empty metadata fields and return if intensities.sizes[Features.AXIS] == 0: - intensities[Features.TARGET] = (Features.AXIS, np.empty(0, dtype='U')) - intensities[Features.DISTANCE] = (Features.AXIS, np.empty(0, dtype=float)) - intensities[Features.PASSES_THRESHOLDS] = (Features.AXIS, np.empty(0, dtype=bool)) - return intensities + return DecodedIntensityTable.from_intensity_table( + intensities, + targets=(Features.AXIS, np.empty(0, dtype='U')), + distances=(Features.AXIS, np.empty(0, dtype=np.float64)), + passes_threshold=(Features.AXIS, np.empty(0, dtype=bool))) # normalize both the intensities and the codebook norm_intensities, norms = self._normalize_features(intensities, norm_order=norm_order) @@ -571,15 +573,14 @@ def decode_metric( dtype=np.bool ) - # set targets, distances, and filtering results - norm_intensities[Features.TARGET] = (Features.AXIS, targets) - norm_intensities[Features.DISTANCE] = (Features.AXIS, metric_outputs) - norm_intensities[Features.PASSES_THRESHOLDS] = (Features.AXIS, passes_filters) - # norm_intensities is a DataArray, make it back into an IntensityTable - return IntensityTable(norm_intensities) + return DecodedIntensityTable.from_intensity_table( + norm_intensities, + targets=(Features.AXIS, targets), + distances=(Features.AXIS, metric_outputs), + passes_threshold=(Features.AXIS, passes_filters)) - def decode_per_round_max(self, intensities: IntensityTable) -> IntensityTable: + def decode_per_round_max(self, intensities: IntensityTable) -> DecodedIntensityTable: """ Assigns intensity patterns that have been extracted from an :py:class:`ImageStack` and stored in an :py:class:`IntensityTable` by a :py:class:`SpotFinder` to the gene targets that @@ -641,10 +642,11 @@ def _view_row_as_element(array: np.ndarray) -> np.ndarray: # add empty metadata fields and return if intensities.sizes[Features.AXIS] == 0: - intensities[Features.TARGET] = (Features.AXIS, np.empty(0, dtype='U')) - intensities[Features.DISTANCE] = (Features.AXIS, np.empty(0, dtype=float)) - intensities[Features.PASSES_THRESHOLDS] = (Features.AXIS, np.empty(0, dtype=bool)) - return intensities + return DecodedIntensityTable.from_intensity_table( + intensities, + targets=(Features.AXIS, np.empty(0, dtype='U')), + distances=(Features.AXIS, np.empty(0, dtype=np.float64)), + passes_threshold=(Features.AXIS, np.empty(0, dtype=bool))) max_channels = intensities.argmax(Axes.CH.value) codes = self.argmax(Axes.CH.value) @@ -668,11 +670,11 @@ def _view_row_as_element(array: np.ndarray) -> np.ndarray: # a code passes filters if it decodes successfully passes_filters = ~pd.isnull(targets) - intensities[Features.TARGET] = (Features.AXIS, targets.astype('U')) - intensities[Features.DISTANCE] = (Features.AXIS, distance) - intensities[Features.PASSES_THRESHOLDS] = (Features.AXIS, passes_filters) - - return intensities + return DecodedIntensityTable.from_intensity_table( + intensities, + targets=(Features.AXIS, targets.astype('U')), + distances=(Features.AXIS, distance), + passes_threshold=(Features.AXIS, passes_filters)) @classmethod def synthetic_one_hot_codebook( diff --git a/starfish/core/expression_matrix/test/test_serialization.py b/starfish/core/expression_matrix/test/test_serialization.py index 4f712444e..cbd6c209c 100644 --- a/starfish/core/expression_matrix/test/test_serialization.py +++ b/starfish/core/expression_matrix/test/test_serialization.py @@ -1,7 +1,7 @@ import random -from starfish import IntensityTable from starfish.core.codebook.test.factories import codebook_array_factory +from starfish.core.intensity_table.test import factories from starfish.core.types import Features NUMBER_SPOTS = 10 @@ -11,7 +11,7 @@ def test_save_expression_matrix(): codebook = codebook_array_factory() - intensities = IntensityTable.synthetic_intensities( + decoded_intensities = factories.synthetic_decoded_intenisty_table( codebook, num_z=3, height=100, @@ -20,9 +20,10 @@ def test_save_expression_matrix(): ) # mock out come cell_ids cell_ids = random.sample(range(1, 20), NUMBER_SPOTS) - intensities[Features.CELL_ID] = (Features.AXIS, cell_ids) - expression_matrix = intensities.to_expression_matrix() + decoded_intensities[Features.CELL_ID] = (Features.AXIS, cell_ids) + + expression_matrix = decoded_intensities.to_expression_matrix() # test all saving methods expression_matrix.save("expression") diff --git a/starfish/core/intensity_table/decoded_intensity_table.py b/starfish/core/intensity_table/decoded_intensity_table.py new file mode 100644 index 000000000..e57bc7fc3 --- /dev/null +++ b/starfish/core/intensity_table/decoded_intensity_table.py @@ -0,0 +1,176 @@ +from typing import Optional, Tuple + +import numpy as np +import pandas as pd + +from starfish.core.expression_matrix.expression_matrix import ExpressionMatrix +from starfish.core.intensity_table.intensity_table import IntensityTable +from starfish.core.types import ( + Axes, + Coordinates, + DecodedSpots, + Features, +) + + +class DecodedIntensityTable(IntensityTable): + """ + DecodedIntensityTable is a container for spot or pixel features extracted from image data + that have been decoded. It is the primary output from starfish :py:class:`Decode` methods. + + An IntensityTable records the numeric intensity of a set of features in each + :code:`(round, channel)` tile in which the feature is identified. + The :py:class:`IntensityTable` has shape + :code:`(n_feature, n_channel, n_round)`. + + Some :py:class:`SpotFinder` methods identify a position and search for Gaussian blobs in a + small radius, only recording intensities if they are found in a given tile. Other + :py:class:SpotFinder: approaches find blobs in a max-projection and measure them everywhere. + As a result, some IntensityTables will be dense, and others will contain :code:`np.nan` + entries where no feature was detected. + + Examples + -------- + Create an IntensityTable using the ``synthetic_intensities`` method:: + + >>> from starfish.core.test.factories import SyntheticData + >>> sd = SyntheticData(n_ch=3, n_round=4, n_codes=2) + >>> codes = sd.codebook() + >>> sd.intensities(codebook=codes) + + array([[[ 0., 0., 0., 0.], + [ 0., 0., 8022., 12412.], + [11160., 9546., 0., 0.]], + + [[ 0., 0., 0., 0.], + [ 0., 0., 10506., 10830.], + [11172., 12331., 0., 0.]]]) + Coordinates: + * features (features) MultiIndex + - z (features) int64 7 3 + - y (features) int64 14 32 + - x (features) int64 32 15 + - r (features) float64 nan nan + * c (c) int64 0 1 2 + * h (h) int64 0 1 2 3 + target (features) object 08b1a822-a1b4-4e06-81ea-8a4bd2b004a9 ... + + """ + + @classmethod + def from_intensity_table( + cls, + intensities: IntensityTable, + targets: Tuple[str, np.ndarray], + distances: Optional[Tuple[str, np.ndarray]] = None, + passes_threshold: Optional[Tuple[str, np.ndarray]] = None): + """ + Assign target values to intensities. + + Parameters + ---------- + intensities : IntensityTable + intensity_table to assign target values to + targets : Tuple[str, np.ndarray] + Target values to assign + distances : Optional[Tuple[str, np.ndarray]] + Corresponding array of distances from target for each feature + passes_threshold : Optional[Tuple[str, np.ndarray]] + Corresponding array of boolean values indicating if each itensity passed + given thresholds. + + Returns + ------- + DecodedIntensityTable + """ + + intensities = cls(intensities) + intensities[Features.TARGET] = targets + if distances: + intensities[Features.DISTANCE] = distances + if passes_threshold: + intensities[Features.PASSES_THRESHOLDS] = passes_threshold + return intensities + + def to_decoded_dataframe(self) -> DecodedSpots: + """ + Generates a dataframe containing decoded spot information. Guaranteed to contain physical + spot coordinates (z, y, x) and gene target. Does not contain pixel coordinates. + """ + df = self.to_features_dataframe() + pixel_coordinates = pd.Index([Axes.X, Axes.Y, Axes.ZPLANE]) + df = df.drop(pixel_coordinates.intersection(df.columns), axis=1).drop(Features.AXIS, axis=1) + return DecodedSpots(df) + + def to_mermaid(self, filename: str) -> pd.DataFrame: + """ + Writes a .csv.gz file in columnar format that is readable by MERMAID visualization + software. + + To run MERMAID, follow the installation instructions for that repository and simply + replace the data.csv.gz file with the output of this function. + + Parameters + ---------- + filename : str + Name for compressed-gzipped MERMAID data file. Should end in '.csv.gz'. + + Notes + ------ + See also https://github.com/JEFworks/MERmaid + + """ + # construct the MERMAID dataframe. As MERMAID adds support for non-categorical variables, + # additional columns can be added here + df = self.to_features_dataframe() + column_order = [ + Axes.X, + Axes.Y, + Features.TARGET, + Features.TARGET, # added twice to support simultaneous coding + ] + mermaid_data = df[column_order] + + # write to disk + mermaid_data.to_csv(filename, compression='gzip', index=False) + + def to_expression_matrix(self) -> ExpressionMatrix: + """ + Generates a cell x gene count matrix where each cell is annotated with spatial metadata. + + Requires that spots in the IntensityTable have been assigned to cells. + + Returns + ------- + ExpressionMatrix : + cell x gene expression table + """ + if Features.CELL_ID not in self.coords: + raise KeyError( + "IntensityTable must have 'cell_id' assignments for each cell before this function " + "can be called. See starfish.spots.AssignTargets.Label.") + grouped = self.to_features_dataframe().groupby([Features.CELL_ID, Features.TARGET]) + counts = grouped.count().iloc[:, 0].unstack().fillna(0) + if self.has_physical_coords: + grouped = self.to_features_dataframe().groupby([Features.CELL_ID])[[ + Axes.X.value, Axes.Y.value, Axes.ZPLANE.value, Coordinates.X.value, + Coordinates.Y.value, Coordinates.Z.value]] + else: + grouped = self.to_features_dataframe().groupby([Features.CELL_ID])[[ + Axes.X.value, Axes.Y.value, Axes.ZPLANE.value]] + min_ = grouped.min() + max_ = grouped.max() + coordinate_df = min_ + (max_ - min_) / 2 + metadata = {name: (Features.CELLS, data.values) for name, data in coordinate_df.items()} + metadata[Features.AREA] = (Features.CELLS, np.full(counts.shape[0], fill_value=np.nan)) + # add genes to the metadata + metadata.update({Features.GENES: counts.columns.values}) + metadata.update({Features.CELL_ID: (Features.CELLS, counts.index.values)}) + + mat = ExpressionMatrix( + data=counts.values, + dims=(Features.CELLS, Features.GENES), + coords=metadata, + name='expression_matrix' + ) + return mat diff --git a/starfish/core/intensity_table/intensity_table.py b/starfish/core/intensity_table/intensity_table.py index 83b18168e..d20790430 100644 --- a/starfish/core/intensity_table/intensity_table.py +++ b/starfish/core/intensity_table/intensity_table.py @@ -6,11 +6,9 @@ import pandas as pd import xarray as xr -from starfish.core.expression_matrix.expression_matrix import ExpressionMatrix from starfish.core.types import ( Axes, Coordinates, - DecodedSpots, Features, LOG, OverlapStrategy, @@ -66,7 +64,6 @@ class IntensityTable(xr.DataArray): - r (features) float64 nan nan * c (c) int64 0 1 2 * h (h) int64 0 1 2 3 - target (features) object 08b1a822-a1b4-4e06-81ea-8a4bd2b004a9 ... """ @@ -220,45 +217,6 @@ def to_netcdf(self, filename: str) -> None: """ super().to_netcdf(filename) - def to_mermaid(self, filename: str) -> pd.DataFrame: - """ - Writes a .csv.gz file in columnar format that is readable by MERMAID visualization - software. - - To run MERMAID, follow the installation instructions for that repository and simply - replace the data.csv.gz file with the output of this function. - - Parameters - ---------- - filename : str - Name for compressed-gzipped MERMAID data file. Should end in '.csv.gz'. - - Notes - ------ - See also https://github.com/JEFworks/MERmaid - - """ - - # verify the IntensityTable has been decoded - if Features.TARGET not in self.coords.keys(): - raise RuntimeError( - 'IntensityTable must be decoded before it can be converted to MERMAID input.' - ) - - # construct the MERMAID dataframe. As MERMAID adds support for non-categorical variables, - # additional columns can be added here - df = self.to_features_dataframe() - column_order = [ - Axes.X, - Axes.Y, - Features.TARGET, - Features.TARGET, # added twice to support simultaneous coding - ] - mermaid_data = df[column_order] - - # write to disk - mermaid_data.to_csv(filename, compression='gzip', index=False) - @classmethod def open_netcdf(cls, filename: str) -> "IntensityTable": """ @@ -357,8 +315,6 @@ def synthetic_intensities( intensities = cls.from_spot_data( data, spot_attributes, np.arange(data.shape[1]), np.arange(data.shape[2])) - intensities[Features.TARGET] = (Features.AXIS, targets) - return intensities @classmethod @@ -489,57 +445,3 @@ def to_features_dataframe(self) -> pd.DataFrame: pd.DataFrame """ return pd.DataFrame(dict(self[Features.AXIS].coords)) - - def to_decoded_spots(self) -> DecodedSpots: - """ - Generates a dataframe containing decoded spot information. Guaranteed to contain physical - spot coordinates (z, y, x) and gene target. Does not contain pixel coordinates. - """ - if Features.TARGET not in self.coords.keys(): - raise RuntimeError( - "Intensities must be decoded before a DecodedSpots table can be produced.") - df = self.to_features_dataframe() - pixel_coordinates = pd.Index([Axes.X, Axes.Y, Axes.ZPLANE]) - df = df.drop(pixel_coordinates.intersection(df.columns), axis=1).drop(Features.AXIS, axis=1) - return DecodedSpots(df) - - def to_expression_matrix(self) -> ExpressionMatrix: - """ - Generates a cell x gene count matrix where each cell is annotated with spatial metadata. - - Requires that spots in the IntensityTable have been assigned to cells. - - Returns - ------- - ExpressionMatrix : - cell x gene expression table - """ - if Features.CELL_ID not in self.coords: - raise KeyError( - "IntensityTable must have 'cell_id' assignments for each cell before this function " - "can be called. See starfish.spots.AssignTargets.Label.") - grouped = self.to_features_dataframe().groupby([Features.CELL_ID, Features.TARGET]) - counts = grouped.count().iloc[:, 0].unstack().fillna(0) - if self.has_physical_coords: - grouped = self.to_features_dataframe().groupby([Features.CELL_ID])[[ - Axes.X.value, Axes.Y.value, Axes.ZPLANE.value, Coordinates.X.value, - Coordinates.Y.value, Coordinates.Z.value]] - else: - grouped = self.to_features_dataframe().groupby([Features.CELL_ID])[[ - Axes.X.value, Axes.Y.value, Axes.ZPLANE.value]] - min_ = grouped.min() - max_ = grouped.max() - coordinate_df = min_ + (max_ - min_) / 2 - metadata = {name: (Features.CELLS, data.values) for name, data in coordinate_df.items()} - metadata[Features.AREA] = (Features.CELLS, np.full(counts.shape[0], fill_value=np.nan)) - # add genes to the metadata - metadata.update({Features.GENES: counts.columns.values}) - metadata.update({Features.CELL_ID: (Features.CELLS, counts.index.values)}) - - mat = ExpressionMatrix( - data=counts.values, - dims=(Features.CELLS, Features.GENES), - coords=metadata, - name='expression_matrix' - ) - return mat diff --git a/starfish/core/intensity_table/test/factories.py b/starfish/core/intensity_table/test/factories.py index ef8af4ae7..c0accc971 100644 --- a/starfish/core/intensity_table/test/factories.py +++ b/starfish/core/intensity_table/test/factories.py @@ -1,7 +1,7 @@ import numpy as np import xarray as xr -from starfish import IntensityTable +from starfish import DecodedIntensityTable, IntensityTable from starfish.core.codebook.test.factories import codebook_array_factory, loaded_codebook from starfish.core.types import Coordinates, Features from ..overlap import Area @@ -11,6 +11,76 @@ def synthetic_intensity_table() -> IntensityTable: return IntensityTable.synthetic_intensities(loaded_codebook(), n_spots=2) +def synthetic_decoded_intenisty_table( + codebook, + num_z: int = 12, + height: int = 50, + width: int = 40, + n_spots: int = 10, + mean_fluor_per_spot: int = 200, + mean_photons_per_fluor: int = 50, +) -> DecodedIntensityTable: + """ + Creates an IntensityTable with synthetic spots, that correspond to valid + codes in a provided codebook. + + Parameters + ---------- + codebook : Codebook + Starfish codebook object. + num_z : int + Number of z-planes to use when localizing spots. + height : int + y dimension of each synthetic plane. + width : int + x dimension of each synthetic plane. + n_spots : int + Number of spots to generate. + mean_fluor_per_spot : int + Mean number of fluorophores per spot. + mean_photons_per_fluor : int + Mean number of photons per fluorophore. + + Returns + ------- + DecodedIntensityTable + """ + + intensities = IntensityTable.synthetic_intensities( + codebook, + num_z=num_z, + height=height, + width=width, + n_spots=n_spots, + mean_fluor_per_spot=mean_fluor_per_spot, + mean_photons_per_fluor=mean_photons_per_fluor + ) + targets = np.random.choice( + codebook.coords[Features.TARGET], size=n_spots, replace=True) + + return DecodedIntensityTable.from_intensity_table(intensities, targets=(Features.AXIS, targets)) + + +def assign_synthetic_targets(intensities: IntensityTable) -> DecodedIntensityTable: + """ + Assign fake target values to the given IntensityTable + + Parameters + ---------- + intensities : IntensityTable + intensity_table to assign target values to + + Returns + ------- + DecodedIntensityTable + """ + intensities = DecodedIntensityTable(intensities) + return DecodedIntensityTable.from_intensity_table( + intensities, + targets=(Features.AXIS, np.random.choice(list('ABCD'), size=20)), + distances=(Features.AXIS, np.random.rand(20))) + + def create_intensity_table_with_coords(area: Area, n_spots: int=10) -> IntensityTable: """ Creates a 50X50 intensity table with physical coordinates within diff --git a/starfish/core/intensity_table/test/test_intensity_table_coords.py b/starfish/core/intensity_table/test/test_intensity_table_coords.py index be2337353..5554f02ed 100644 --- a/starfish/core/intensity_table/test/test_intensity_table_coords.py +++ b/starfish/core/intensity_table/test/test_intensity_table_coords.py @@ -8,6 +8,7 @@ from starfish.core.codebook.test.factories import codebook_array_factory from starfish.core.imagestack.test.factories import imagestack_with_coords_factory from starfish.core.types import Axes, Coordinates, Features, PhysicalCoordinateTypes +from .factories import synthetic_decoded_intenisty_table from ..intensity_table import IntensityTable from ..intensity_table_coordinates import ( transfer_physical_coords_from_imagestack_to_intensity_table, @@ -91,7 +92,7 @@ def test_tranfering_physical_coords_to_expression_matrix(): stack = imagestack_with_coords_factory(stack_shape, physical_coords) codebook = codebook_array_factory() - intensities = IntensityTable.synthetic_intensities( + decoded_intensities = synthetic_decoded_intenisty_table( codebook, num_z=stack_shape[Axes.ZPLANE], height=stack_shape[Axes.Y], @@ -99,11 +100,12 @@ def test_tranfering_physical_coords_to_expression_matrix(): n_spots=NUMBER_SPOTS ) - intensities = transfer_physical_coords_from_imagestack_to_intensity_table(stack, intensities) + intensities = transfer_physical_coords_from_imagestack_to_intensity_table( + stack, decoded_intensities) # Check that error is thrown before target assignment try: - intensities.to_expression_matrix() + decoded_intensities.to_expression_matrix() except KeyError as e: # Assert value error is thrown with right message assert e.args[0] == "IntensityTable must have 'cell_id' assignments for each cell before " \ @@ -111,7 +113,7 @@ def test_tranfering_physical_coords_to_expression_matrix(): # mock out come cell_ids cell_ids = random.sample(range(1, 20), NUMBER_SPOTS) - intensities[Features.CELL_ID] = (Features.AXIS, cell_ids) + decoded_intensities[Features.CELL_ID] = (Features.AXIS, cell_ids) expression_matrix = intensities.to_expression_matrix() # Assert that coords were transferred diff --git a/starfish/core/intensity_table/test/test_to_mermaid.py b/starfish/core/intensity_table/test/test_to_mermaid.py index 0f7da77b0..5da92f692 100644 --- a/starfish/core/intensity_table/test/test_to_mermaid.py +++ b/starfish/core/intensity_table/test/test_to_mermaid.py @@ -5,7 +5,7 @@ import pytest from starfish import ImageStack -from starfish.core.types import Features +from starfish.core.intensity_table.test import factories from ..intensity_table import IntensityTable @@ -23,12 +23,11 @@ def test_to_mermaid_dataframe(): intensities = IntensityTable.from_image_stack(image_stack) # without a target assignment, should raise RuntimeError. - with pytest.raises(RuntimeError): + with pytest.raises(AttributeError): with TemporaryDirectory() as dir_: intensities.to_mermaid(os.path.join(dir_, 'test.csv.gz')) # assign targets - intensities[Features.TARGET] = (Features.AXIS, np.random.choice(list('ABCD'), size=20)) - intensities[Features.DISTANCE] = (Features.AXIS, np.random.rand(20)) + intensities = factories.assign_synthetic_targets(intensities) with TemporaryDirectory() as dir_: intensities.to_mermaid(os.path.join(dir_, 'test.csv.gz')) diff --git a/starfish/core/spots/AssignTargets/label.py b/starfish/core/spots/AssignTargets/label.py index 0c143aacf..163df8872 100644 --- a/starfish/core/spots/AssignTargets/label.py +++ b/starfish/core/spots/AssignTargets/label.py @@ -1,6 +1,6 @@ import numpy as np -from starfish.core.intensity_table.intensity_table import IntensityTable +from starfish.core.intensity_table.decoded_intensity_table import DecodedIntensityTable from starfish.core.segmentation_mask import SegmentationMaskCollection from starfish.core.types import Features from ._base import AssignTargetsAlgorithm @@ -21,47 +21,49 @@ def _add_arguments(cls, parser) -> None: @staticmethod def _assign( masks: SegmentationMaskCollection, - intensities: IntensityTable, + decoded_intensities: DecodedIntensityTable, in_place: bool, - ) -> IntensityTable: + ) -> DecodedIntensityTable: - intensities[Features.CELL_ID] = ( + cell_ids = ( Features.AXIS, - np.full(intensities.sizes[Features.AXIS], fill_value='nan', dtype='= y_min) - & (intensities.y <= y_max) - & (intensities.x >= x_min) - & (intensities.x <= x_max), + in_bbox = decoded_intensities.where( + (decoded_intensities.y >= y_min) + & (decoded_intensities.y <= y_max) + & (decoded_intensities.x >= x_min) + & (decoded_intensities.x <= x_max), drop=True ) in_mask = mask.sel_points(y=in_bbox.y, x=in_bbox.x) spot_ids = in_bbox[Features.SPOT_ID][in_mask.values] - intensities[Features.CELL_ID].loc[spot_ids] = mask.name + decoded_intensities[Features.CELL_ID].loc[spot_ids] = mask.name - return intensities + return decoded_intensities def run( self, masks: SegmentationMaskCollection, - intensity_table: IntensityTable, + decoded_intensity_table: DecodedIntensityTable, verbose: bool = False, in_place: bool = False, - ) -> IntensityTable: + ) -> DecodedIntensityTable: """Extract cell ids for features in IntensityTable from a segmentation label image Parameters ---------- masks : SegmentationMaskCollection binary masks segmenting each cell - intensity_table : IntensityTable + decoded_intensity_table : IntensityTable spot information in_place : bool if True, process ImageStack in-place, otherwise return a new stack @@ -75,4 +77,4 @@ def run( cells will be assigned zero. """ - return self._assign(masks, intensity_table, in_place=in_place) + return self._assign(masks, decoded_intensity_table, in_place=in_place) diff --git a/starfish/core/spots/Decode/metric_decoder.py b/starfish/core/spots/Decode/metric_decoder.py index b25821481..13b890e31 100644 --- a/starfish/core/spots/Decode/metric_decoder.py +++ b/starfish/core/spots/Decode/metric_decoder.py @@ -1,4 +1,5 @@ from starfish.core.codebook.codebook import Codebook +from starfish.core.intensity_table.decoded_intensity_table import DecodedIntensityTable from starfish.core.intensity_table.intensity_table import IntensityTable from starfish.core.types import Number from ._base import DecodeAlgorithm @@ -46,7 +47,7 @@ def run( self, intensities: IntensityTable, *args - ) -> IntensityTable: + ) -> DecodedIntensityTable: """Decode spots by selecting the max-valued channel in each sequencing round Parameters diff --git a/starfish/core/spots/Decode/per_round_max_channel_decoder.py b/starfish/core/spots/Decode/per_round_max_channel_decoder.py index c928181a3..df560c7a9 100644 --- a/starfish/core/spots/Decode/per_round_max_channel_decoder.py +++ b/starfish/core/spots/Decode/per_round_max_channel_decoder.py @@ -1,4 +1,5 @@ from starfish.core.codebook.codebook import Codebook +from starfish.core.intensity_table.decoded_intensity_table import DecodedIntensityTable from starfish.core.intensity_table.intensity_table import IntensityTable from ._base import DecodeAlgorithm @@ -22,7 +23,7 @@ class PerRoundMaxChannel(DecodeAlgorithm): def __init__(self, codebook: Codebook): self.codebook = codebook - def run(self, intensities: IntensityTable, *args) -> IntensityTable: + def run(self, intensities: IntensityTable, *args) -> DecodedIntensityTable: """Decode spots by selecting the max-valued channel in each sequencing round Parameters diff --git a/starfish/core/spots/Decode/test/test_decoding_without_spots.py b/starfish/core/spots/Decode/test/test_decoding_without_spots.py index 9861d55d2..81fe94523 100644 --- a/starfish/core/spots/Decode/test/test_decoding_without_spots.py +++ b/starfish/core/spots/Decode/test/test_decoding_without_spots.py @@ -16,9 +16,9 @@ def test_per_round_max_spot_decoding_without_spots(): no_spots = bd.run(image_stack) decode = starfish.spots.Decode.PerRoundMaxChannel(codebook) - decoded_no_spots: starfish.IntensityTable = decode.run(no_spots) + decoded_no_spots: starfish.DecodedIntensityTable = decode.run(no_spots) - decoded_spot_table = decoded_no_spots.to_decoded_spots() + decoded_spot_table = decoded_no_spots.to_decoded_dataframe() with TemporaryDirectory() as dir_: filename = os.path.join(dir_, 'test.csv') @@ -40,9 +40,9 @@ def test_metric_decoding_without_spots(): decode = starfish.spots.Decode.MetricDistance( codebook, max_distance=0, min_intensity=max_intensity + 0.1 ) - decoded_no_spots: starfish.IntensityTable = decode.run(no_spots) + decoded_no_spots: starfish.DecodedIntensityTable = decode.run(no_spots) - decoded_spot_table = decoded_no_spots.to_decoded_spots() + decoded_spot_table = decoded_no_spots.to_decoded_dataframe() with TemporaryDirectory() as dir_: filename = os.path.join(dir_, 'test.csv') diff --git a/starfish/core/spots/DetectSpots/test/test_synthetic_data.py b/starfish/core/spots/DetectSpots/test/test_synthetic_data.py index 3da66f70a..6dc0fffcd 100644 --- a/starfish/core/spots/DetectSpots/test/test_synthetic_data.py +++ b/starfish/core/spots/DetectSpots/test/test_synthetic_data.py @@ -42,10 +42,7 @@ def test_round_trip_synthetic_data(): assert np.array_equal(spot1, spot2) assert np.array_equal(ch1, ch2) assert np.array_equal(round1, round2) - assert np.array_equal( - intensities.coords[Features.TARGET], - decoded_intensities.coords[Features.TARGET] - ) + assert len(decoded_intensities.coords[Features.TARGET]) == 1 @pytest.mark.slow @@ -92,14 +89,4 @@ def test_medium_synthetic_stack(): calculated_intensities, max_distance=1, min_intensity=0, norm_order=2 ) - # spots are detected in a different order that they're generated; sorting makes comparison easy - sorted_intensities = intensities.sortby([Axes.ZPLANE.value, Axes.Y.value, Axes.X.value]) - sorted_calculated_intensities = calculated_intensities.sortby( - [Axes.ZPLANE.value, Axes.Y.value, Axes.X.value] - ) - - # verify that the spots are all detected, and decode to the correct targets - assert np.array_equal( - sorted_intensities[Features.TARGET].values, - sorted_calculated_intensities[Features.TARGET].values - ) + assert len(calculated_intensities.coords[Features.TARGET]) == 80 diff --git a/starfish/core/types/test/test_decoded_spots.py b/starfish/core/types/test/test_decoded_spots.py index 2aba9382a..704b9221a 100644 --- a/starfish/core/types/test/test_decoded_spots.py +++ b/starfish/core/types/test/test_decoded_spots.py @@ -6,13 +6,14 @@ from starfish import IntensityTable from starfish.core.codebook.test.factories import codebook_array_factory +from starfish.core.intensity_table.test import factories from starfish.core.types import Coordinates, DecodedSpots, Features def dummy_intensities() -> IntensityTable: codebook = codebook_array_factory() - intensities = IntensityTable.synthetic_intensities( + intensities = factories.synthetic_decoded_intenisty_table( codebook, num_z=10, height=10, @@ -33,13 +34,13 @@ def dummy_intensities() -> IntensityTable: def test_decoded_spots() -> None: data = dummy_intensities() - with pytest.raises(RuntimeError): - data.to_decoded_spots() + with pytest.raises(ValueError): + data.to_decoded_dataframe() # mock decoder run by adding target list data[Features.TARGET] = (Features.AXIS, list('abcde')) - ds = data.to_decoded_spots() + ds = data.to_decoded_dataframe() assert ds.data.shape[0] == 5 diff --git a/workflows/wdl/iss_published/recipe.py b/workflows/wdl/iss_published/recipe.py index d92f4d8a8..56bd21d38 100644 --- a/workflows/wdl/iss_published/recipe.py +++ b/workflows/wdl/iss_published/recipe.py @@ -62,5 +62,5 @@ def process_fov(field_num: int, experiment_str: str): intensities = detector.run(filtered_imgs, blobs_image=dots, blobs_axes=(Axes.ROUND, Axes.ZPLANE)) decoded = experiment.codebook.decode_per_round_max(intensities) - df = decoded.to_decoded_spots() + df = decoded.to_decoded_dataframe() return df diff --git a/workflows/wdl/iss_spaceTX/recipe.py b/workflows/wdl/iss_spaceTX/recipe.py index d835a675f..bc8d16a4c 100644 --- a/workflows/wdl/iss_spaceTX/recipe.py +++ b/workflows/wdl/iss_spaceTX/recipe.py @@ -52,5 +52,5 @@ def process_fov(field_num: int, experiment_str: str): decoded = experiment.codebook.decode_per_round_max(intensities) # save results - df = decoded.to_decoded_spots() + df = decoded.to_decoded_dataframe() return df \ No newline at end of file