Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Intensity Table Concat Processing #1118

Merged
merged 23 commits into from
Apr 10, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 29 additions & 5 deletions starfish/intensity_table/intensity_table.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from itertools import product
from json import loads
from typing import Dict, List, Union
from typing import Dict, List, Optional, Union

import numpy as np
import pandas as pd
Expand All @@ -13,10 +13,15 @@
DecodedSpots,
Features,
LOG,
OverlapStrategy,
SpotAttributes,
STARFISH_EXTRAS_KEY
)
from starfish.util.dtype import preserve_float_range
from starfish.util.overlap_utils import (
find_overlaps_of_xarrays,
OVERLAP_STRATEGY_MAP,
)


class IntensityTable(xr.DataArray):
Expand Down Expand Up @@ -400,10 +405,29 @@ def from_image_stack(
return IntensityTable.from_spot_data(intensity_data, pixel_coordinates)

@staticmethod
def concatanate_intensity_tables(intensity_tables: List["IntensityTable"]):
# TODO VARY CONCAT LOGIC IF TILES OVERLAP
# This method is a starting point for handling tile overlap, right now
# it does a simple concat but people want other overlap logic implmented
def process_overlaps(intensity_tables: List["IntensityTable"],
overlap_strategy: OverlapStrategy
) -> List["IntensityTable"]:
"""Find the overlapping sections between IntensityTables and process them according
to the given overlap strategy
"""
overlap_pairs = find_overlaps_of_xarrays(intensity_tables)
for indices in overlap_pairs:
overlap_method = OVERLAP_STRATEGY_MAP[overlap_strategy]
idx1, idx2 = indices
# modify IntensityTables based on overlap strategy
it1, it2 = overlap_method(intensity_tables[idx1], intensity_tables[idx2])
# replace IntensityTables in list
intensity_tables[idx1] = it1
intensity_tables[idx2] = it2
return intensity_tables

@staticmethod
def concatanate_intensity_tables(intensity_tables: List["IntensityTable"],
overlap_strategy: Optional[OverlapStrategy] = None):
if overlap_strategy:
intensity_tables = IntensityTable.process_overlaps(intensity_tables,
overlap_strategy)
return xr.concat(intensity_tables, dim=Features.AXIS)

def to_features_dataframe(self) -> pd.DataFrame:
Expand Down
153 changes: 153 additions & 0 deletions starfish/test/test_overlap_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
import numpy as np
import xarray as xr

from starfish import IntensityTable
from starfish.test import factories
from starfish.types import Coordinates, Features
from starfish.types._constants import OverlapStrategy
from starfish.util.overlap_utils import (
Area,
find_overlaps_of_xarrays,
remove_area_of_xarray,
sel_area_of_xarray
)


def create_intensity_table_with_coords(area: Area, n_spots: int=10) -> IntensityTable:
"""
Creates a 50X50 intensity table with physical coordinates within
the given Area.

Parameters
----------
area: Area
The area of physical space the IntensityTable should be defined over
n_spots:
Number of spots to add to the IntensityTable
"""
codebook = factories.codebook_array_factory()
it = IntensityTable.synthetic_intensities(
codebook,
num_z=1,
height=50,
width=50,
n_spots=n_spots
)
# intensity table 1 has 10 spots, xmin = 0, ymin = 0, xmax = 2, ymax = 1
it[Coordinates.X.value] = xr.DataArray(np.linspace(area.min_x, area.max_x, n_spots),
dims=Features.AXIS)
it[Coordinates.Y.value] = xr.DataArray(np.linspace(area.min_y, area.max_y, n_spots),
dims=Features.AXIS)
return it


def test_find_area_intersection():
"""
Create various Area objects and verify their intersection are calculated correctly
"""
area1 = Area(min_x=0, max_x=2, min_y=0, max_y=2)
area2 = Area(min_x=1, max_x=2, min_y=1, max_y=3)
intersection = Area.find_intersection(area1, area2)
# intersection should be area with bottom point (1,1) and top point (2,2)
assert intersection == Area(min_x=1, max_x=2, min_y=1, max_y=2)

area2 = Area(min_x=3, max_x=5, min_y=3, max_y=5)
intersection = Area.find_intersection(area1, area2)
# no intersection
assert intersection is None

area2 = Area(min_x=0, max_x=5, min_y=3, max_y=5)
intersection = Area.find_intersection(area1, area2)
# area 2 right above area one
assert intersection is None

# try negatives
area1 = Area(min_x=-1, max_x=1, min_y=0, max_y=2)
area2 = Area(min_x=0, max_x=2, min_y=0, max_y=2)
intersection = Area.find_intersection(area1, area2)
assert intersection == Area(min_x=0, max_x=1, min_y=0, max_y=2)

area2 = Area(min_x=-3, max_x=-2, min_y=0, max_y=2)
intersection = Area.find_intersection(area1, area2)
assert intersection is None


def test_find_overlaps_of_xarrays():
"""
Create a list of overlapping IntensityTables and verify we identify the correct
overlapping sections
"""
# Create some overlapping intensity tables
it0 = create_intensity_table_with_coords(Area(min_x=0, max_x=1,
min_y=0, max_y=1))
it1 = create_intensity_table_with_coords(Area(min_x=.5, max_x=2,
min_y=.5, max_y=1.5))
it2 = create_intensity_table_with_coords(Area(min_x=1.5, max_x=2.5,
min_y=0, max_y=1))
it3 = create_intensity_table_with_coords(Area(min_x=0, max_x=1,
min_y=1, max_y=2))
overlaps = find_overlaps_of_xarrays([it0, it1, it2, it3])
# should have 4 total overlaps
assert len(overlaps) == 4
# overlap 1 between it0 and it1:
assert (0, 1) in overlaps
# overlap 1 between it0 and it1:
assert (1, 2) in overlaps
# overlap 3 between it1 and it3
assert (1, 3) in overlaps
# overlap 4 between it0 and it3
assert (0, 3) in overlaps


def test_remove_area_of_xarray():
"""
Tests removing a section of an IntensityTable defined by its physical area
"""
it = create_intensity_table_with_coords(Area(min_x=0, max_x=2,
min_y=0, max_y=2), n_spots=10)

area = Area(min_x=1, max_x=2, min_y=1, max_y=3)
# grab some random coord values in this range
removed_x = it.where(it.xc > 1, drop=True)[Coordinates.X.value].data[0]
removed_y = it.where(it.yc > 1, drop=True)[Coordinates.X.value].data[3]

it = remove_area_of_xarray(it, area)
# assert coords from removed section are no longer in it
assert not np.any(np.isclose(it[Coordinates.X.value], removed_x))
assert not np.any(np.isclose(it[Coordinates.Y.value], removed_y))


def test_sel_area_of_xarray():
"""
Tests selecting a section of an IntensityTable defined by its physical area
"""
it = create_intensity_table_with_coords(Area(min_x=0, max_x=2, min_y=0, max_y=2), n_spots=10)

area = Area(min_x=1, max_x=2, min_y=1, max_y=3)
it = sel_area_of_xarray(it, area)

# Assert new min/max values
assert min(it[Coordinates.X.value]).data >= 1
assert max(it[Coordinates.X.value]).data <= 2
assert min(it[Coordinates.Y.value]).data >= 1
assert max(it[Coordinates.X.value]).data <= 2


def test_take_max():
"""
Create two overlapping IntensityTables with differing number of spots and verify that
by concatenating them with the TAKE_MAX strategy we only include spots in the overlapping
section from the IntensityTable that had the most.
"""
it1 = create_intensity_table_with_coords(Area(min_x=0, max_x=2,
min_y=0, max_y=2), n_spots=10)
it2 = create_intensity_table_with_coords(Area(min_x=1, max_x=2,
min_y=1, max_y=3), n_spots=20)

concatenated = IntensityTable.concatanate_intensity_tables(
[it1, it2], overlap_strategy=OverlapStrategy.TAKE_MAX)

# The overlap section hits half of the spots from each intensity table, 5 from it1
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wait what? if it hits 5 of the spots from it1, then shouldn't we get a total of 25 spots?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

both the sel and remove_area_of_xarray methods are inclusive...so we get one spot in the comparison count and the concatenation...maybe this is wrong?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would dump the table to make sure it is consistent with your understanding, though I suspect you are correct. :)

# and 10 from i21. It2 wins and the resulting concatenated table should have all the
# spots from it2 (20) and 6 (one on the border) from it1 (6) for a total of 26 spots
assert concatenated.sizes[Features.AXIS] == 26
1 change: 1 addition & 0 deletions starfish/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
CORE_DEPENDENCIES,
Features,
LOG,
OverlapStrategy,
PHYSICAL_COORDINATE_DIMENSION,
PhysicalCoordinateTypes,
STARFISH_EXTRAS_KEY,
Expand Down
8 changes: 8 additions & 0 deletions starfish/types/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,14 @@ class Features:
GENES = 'genes'


class OverlapStrategy(AugmentedEnum):
"""
contains options to use when processes physically overlapping IntensityTables
or ImageStacks
"""
TAKE_MAX = 'take_max'


class Clip(AugmentedEnum):
"""
contains clipping options that determine how out-of-bounds values produced by filters are
Expand Down
Loading