Skip to content

Commit

Permalink
Merge pull request #257 from JuliaKukulies/RC_v1.5.0
Browse files Browse the repository at this point in the history
Resolves conflicts and merges `main` into `RC_v1.5.0`
  • Loading branch information
JuliaKukulies authored Feb 23, 2023
2 parents ff7de02 + de63e7c commit 5ece4f5
Show file tree
Hide file tree
Showing 9 changed files with 175 additions and 77 deletions.
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
### Tobac Changelog

_**Version 1.4.2:**_


**Bug fix**

- Fixed a bug in the segmentation procedure that assigned the wrong grid cell areas to features in data frame [#246](https://github.com/tobac-project/tobac/pull/246)

- Fixed a bug in feature_detection.filter_min_distance() that always selected the feature with the largest threshold, even if the feature detection is targeting minima. The target is now an optional input parameter for the distance filtering [#251](https://github.com/tobac-project/tobac/pull/251)

- Fixed an issue in the 2D coordinate interpolation that produced object dtypes in feature detection and made the feature input data frame incompatible with the merge and split function [#251](https://github.com/tobac-project/tobac/pull/251)


_**Version 1.4.1:**_

**Bug fixes**
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

23 changes: 16 additions & 7 deletions examples/Example_Precip_Tracking/Example_Precip_Tracking.ipynb

Large diffs are not rendered by default.

23 changes: 16 additions & 7 deletions examples/Example_Updraft_Tracking/Example_Updraft_Tracking.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tobac/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,4 @@
from . import merge_split

# Set version number
__version__ = "1.4.1"
__version__ = "1.4.2"
53 changes: 20 additions & 33 deletions tobac/feature_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -944,6 +944,7 @@ def feature_detection_multithreshold(
z_coordinate_name=vertical_coord,
target=target,
)

list_features_timesteps.append(features_thresholds)

logging.debug(
Expand Down Expand Up @@ -985,8 +986,6 @@ def filter_min_distance(
If two features are closer than `min_distance`, it keeps the
larger feature.
TODO: does this function work with minima?
Parameters
----------
features: pandas DataFrame
Expand Down Expand Up @@ -1014,6 +1013,10 @@ def filter_min_distance(
Flag to determine if tracking is targetting minima or maxima in
the data. Default is 'maximum'.
target : str {maximum | minimum}, optional
Whether the threshod target is a maxima or minima (defaults to
maximum)
Returns
-------
pandas DataFrame
Expand Down Expand Up @@ -1100,9 +1103,20 @@ def filter_min_distance(
)

if distance <= min_distance:
# print(distance, min_distance, index_1, index_2, features.size)
# logging.debug('distance<= min_distance: ' + str(distance))
if target == "maximum":
# If same threshold value, remove based on number of pixels
if (
features.loc[index_1, "threshold_value"]
== features.loc[index_2, "threshold_value"]
):
if features.loc[index_1, "num"] > features.loc[index_2, "num"]:
remove_list_distance.append(index_2)
elif features.loc[index_1, "num"] < features.loc[index_2, "num"]:
remove_list_distance.append(index_1)
# Tie break if both have the same number of pixels
elif features.loc[index_1, "num"] == features.loc[index_2, "num"]:
remove_list_distance.append(index_2)
# Else remove based on comparison of thresholds and target
elif target == "maximum":
if (
features.loc[index_1, "threshold_value"]
> features.loc[index_2, "threshold_value"]
Expand All @@ -1113,20 +1127,7 @@ def filter_min_distance(
< features.loc[index_2, "threshold_value"]
):
remove_list_distance.append(index_1)
elif (
features.loc[index_1, "threshold_value"]
== features.loc[index_2, "threshold_value"]
):
if features.loc[index_1, "num"] > features.loc[index_2, "num"]:
remove_list_distance.append(index_2)
elif (
features.loc[index_1, "num"] < features.loc[index_2, "num"]
):
remove_list_distance.append(index_1)
elif (
features.loc[index_1, "num"] == features.loc[index_2, "num"]
):
remove_list_distance.append(index_2)

elif target == "minimum":
if (
features.loc[index_1, "threshold_value"]
Expand All @@ -1138,20 +1139,6 @@ def filter_min_distance(
> features.loc[index_2, "threshold_value"]
):
remove_list_distance.append(index_1)
elif (
features.loc[index_1, "threshold_value"]
== features.loc[index_2, "threshold_value"]
):
if features.loc[index_1, "num"] > features.loc[index_2, "num"]:
remove_list_distance.append(index_2)
elif (
features.loc[index_1, "num"] < features.loc[index_2, "num"]
):
remove_list_distance.append(index_1)
elif (
features.loc[index_1, "num"] == features.loc[index_2, "num"]
):
remove_list_distance.append(index_2)

features = features[~features.index.isin(remove_list_distance)]
return features
8 changes: 5 additions & 3 deletions tobac/segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,14 +466,16 @@ def segmentation_timestep(
# Write resulting mask into cube for output
segmentation_out.data = segmentation_mask

# count number of grid cells asoociated to each tracked cell and write that into DataFrame:
# count number of grid cells associated to each tracked cell and write that into DataFrame:
values, count = np.unique(segmentation_mask, return_counts=True)
counts = dict(zip(values, count))
ncells = np.zeros(len(features_out))
for i, (index, row) in enumerate(features_out.iterrows()):
if row["feature"] in counts.keys():
ncells = counts[row["feature"]]
features_out["ncells"] = ncells
# assign a value for ncells for the respective feature in data frame
features_out.loc[features_out.feature == row["feature"], "ncells"] = counts[
row["feature"]
]

return segmentation_out, features_out

Expand Down
89 changes: 75 additions & 14 deletions tobac/tests/test_segmentation.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import pytest
import tobac.testing as testing
import tobac.segmentation as seg
import numpy as np
from tobac import segmentation, feature_detection, testing


def test_segmentation_timestep_2D_feature_2D_seg():
Expand Down Expand Up @@ -40,7 +39,7 @@ def test_segmentation_timestep_2D_feature_2D_seg():
start_h1=20.0, start_h2=20.0, max_h1=1000, max_h2=1000
)

out_seg_mask, out_df = seg.segmentation_timestep(
out_seg_mask, out_df = segmentation.segmentation_timestep(
field_in=test_data_iris,
features_in=test_feature_ds,
dxy=test_dxy,
Expand Down Expand Up @@ -116,7 +115,7 @@ def test_segmentation_timestep_level():
start_h1=20.0, start_h2=20.0, max_h1=1000, max_h2=1000
)

out_seg_mask, out_df = seg.segmentation_timestep(
out_seg_mask, out_df = segmentation.segmentation_timestep(
field_in=test_data_iris,
features_in=test_feature_ds,
dxy=test_dxy,
Expand All @@ -143,7 +142,7 @@ def test_segmentation_timestep_level():
)

# now set specific levels
out_seg_mask, out_df = seg.segmentation_timestep(
out_seg_mask, out_df = segmentation.segmentation_timestep(
field_in=test_data_iris,
features_in=test_feature_ds,
dxy=test_dxy,
Expand Down Expand Up @@ -198,14 +197,9 @@ def test_segmentation_timestep_3d_seed_box_nopbcs(
True if we expect both features to be segmented, false
if we don't expect them both to be segmented
"""

import numpy as np

"""
The best way to do this I think is to create two blobs near (but not touching)
each other, varying the seed_3D_size so that they are either segmented together
or not segmented together.
or not segmented together.
"""
test_dset_size = (20, 50, 50)
test_hdim_1_pt_1 = 20.0
Expand Down Expand Up @@ -250,7 +244,7 @@ def test_segmentation_timestep_3d_seed_box_nopbcs(
max_h2=1000,
)

out_seg_mask, out_df = seg.segmentation_timestep(
out_seg_mask, out_df = segmentation.segmentation_timestep(
field_in=test_data_iris,
features_in=test_feature_ds,
dxy=test_dxy,
Expand Down Expand Up @@ -345,7 +339,7 @@ def test_different_z_axes(
max_h2=1000,
)
if not expected_raise:
out_seg_mask, out_df = seg.segmentation_timestep(
out_seg_mask, out_df = segmentation.segmentation_timestep(
field_in=test_data_iris,
features_in=test_feature_ds,
dxy=test_dxy,
Expand All @@ -358,9 +352,76 @@ def test_different_z_axes(
else:
# Expecting a raise
with pytest.raises(ValueError):
out_seg_mask, out_df = seg.segmentation_timestep(
out_seg_mask, out_df = segmentation.segmentation_timestep(
field_in=test_data_iris,
features_in=test_feature_ds,
dxy=test_dxy,
threshold=1.5,
)


def test_segmentation_multiple_features():
"""Tests `tobac.segmentation.segmentation_timestep` with a 2D input containing multiple features with different areas.
Tests specifically whether their area (ncells) is correctly calculate and assigned to the different features.
"""
test_dset_size = (50, 50)
test_hdim_1_pt = 20.0
test_hdim_2_pt = 20.0
test_hdim_1_sz = 5
test_hdim_2_sz = 5
size_feature1 = test_hdim_1_sz * test_hdim_2_sz
test_amp = 2
test_min_num = 2

test_data = np.zeros(test_dset_size)
test_data = testing.make_feature_blob(
test_data,
test_hdim_1_pt,
test_hdim_2_pt,
h1_size=test_hdim_1_sz,
h2_size=test_hdim_2_sz,
amplitude=test_amp,
)

# add feature of different size
test_hdim_1_pt = 40.0
test_hdim_2_pt = 40.0
test_hdim_1_sz = 10
test_hdim_2_sz = 10
size_feature2 = test_hdim_1_sz * test_hdim_2_sz
test_amp = 10
test_dxy = 1

test_data = testing.make_feature_blob(
test_data,
test_hdim_1_pt,
test_hdim_2_pt,
h1_size=test_hdim_1_sz,
h2_size=test_hdim_2_sz,
amplitude=test_amp,
)

test_data_iris = testing.make_dataset_from_arr(test_data, data_type="iris")

# detect both features
fd_output = feature_detection.feature_detection_multithreshold_timestep(
test_data_iris,
i_time=0,
dxy=1,
threshold=[1, 2, 3],
n_min_threshold=test_min_num,
target="maximum",
)

# add feature IDs to data frame for one time step
fd_output["feature"] = [1, 2]

# perform segmentation
out_seg_mask, out_df = segmentation.segmentation_timestep(
field_in=test_data_iris, features_in=fd_output, dxy=test_dxy, threshold=1.5
)
out_seg_mask_arr = out_seg_mask.core_data()

# assure that the number of grid cells belonging to each feature (ncells) are consistent with segmentation mask
assert int(out_df[out_df.feature == 1].ncells.values) == size_feature1
assert int(out_df[out_df.feature == 2].ncells.values) == size_feature2

0 comments on commit 5ece4f5

Please sign in to comment.