Merge pull request #257 from JuliaKukulies/RC_v1.5.0

Resolves conflicts and merges `main` into `RC_v1.5.0`
tobac-project · Feb 23, 2023 · 5ece4f5 · 5ece4f5
2 parents ff7de02 + de63e7c
commit 5ece4f5
Show file tree

Hide file tree

Showing 9 changed files with 175 additions and 77 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,17 @@
 ### Tobac Changelog
 
+_**Version 1.4.2:**_
+
+
+**Bug fix**
+
+- Fixed a bug in the segmentation procedure that assigned the wrong grid cell areas to features in data frame  [#246](https://github.com/tobac-project/tobac/pull/246)
+
+- Fixed a bug in feature_detection.filter_min_distance() that always selected the feature with the largest threshold, even if the feature detection is targeting minima. The target is now an optional input parameter for the distance filtering [#251](https://github.com/tobac-project/tobac/pull/251)
+
+- Fixed an issue in the 2D coordinate interpolation that produced object dtypes in feature detection and made the feature input data frame incompatible with the merge and split function [#251](https://github.com/tobac-project/tobac/pull/251)
+
+
 _**Version 1.4.1:**_
 
 **Bug fixes**

diff --git a/examples/Example_OLR_Tracking_model/Example_OLR_Tracking_model.ipynb b/examples/Example_OLR_Tracking_model/Example_OLR_Tracking_model.ipynb
diff --git a/examples/Example_OLR_Tracking_satellite/Example_OLR_Tracking_satellite.ipynb b/examples/Example_OLR_Tracking_satellite/Example_OLR_Tracking_satellite.ipynb
diff --git a/examples/Example_Precip_Tracking/Example_Precip_Tracking.ipynb b/examples/Example_Precip_Tracking/Example_Precip_Tracking.ipynb
diff --git a/examples/Example_Updraft_Tracking/Example_Updraft_Tracking.ipynb b/examples/Example_Updraft_Tracking/Example_Updraft_Tracking.ipynb
diff --git a/tobac/__init__.py b/tobac/__init__.py
@@ -77,4 +77,4 @@
 from . import merge_split
 
 # Set version number
-__version__ = "1.4.1"
+__version__ = "1.4.2"
diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py
@@ -944,6 +944,7 @@ def feature_detection_multithreshold(
                     z_coordinate_name=vertical_coord,
                     target=target,
                 )
+
         list_features_timesteps.append(features_thresholds)
 
         logging.debug(
@@ -985,8 +986,6 @@ def filter_min_distance(
     If two features are closer than `min_distance`, it keeps the
     larger feature.
 
-    TODO: does this function work with minima?
-
     Parameters
     ----------
     features:      pandas DataFrame
@@ -1014,6 +1013,10 @@ def filter_min_distance(
         Flag to determine if tracking is targetting minima or maxima in
         the data. Default is 'maximum'.
 
+    target : str {maximum | minimum}, optional
+        Whether the threshod target is a maxima or minima (defaults to
+        maximum)
+
     Returns
     -------
     pandas DataFrame
@@ -1100,9 +1103,20 @@ def filter_min_distance(
             )
 
             if distance <= min_distance:
-                # print(distance, min_distance, index_1, index_2, features.size)
-                #                        logging.debug('distance<= min_distance: ' + str(distance))
-                if target == "maximum":
+                # If same threshold value, remove based on number of pixels
+                if (
+                    features.loc[index_1, "threshold_value"]
+                    == features.loc[index_2, "threshold_value"]
+                ):
+                    if features.loc[index_1, "num"] > features.loc[index_2, "num"]:
+                        remove_list_distance.append(index_2)
+                    elif features.loc[index_1, "num"] < features.loc[index_2, "num"]:
+                        remove_list_distance.append(index_1)
+                    # Tie break if both have the same number of pixels
+                    elif features.loc[index_1, "num"] == features.loc[index_2, "num"]:
+                        remove_list_distance.append(index_2)
+                # Else remove based on comparison of thresholds and target
+                elif target == "maximum":
                     if (
                         features.loc[index_1, "threshold_value"]
                         > features.loc[index_2, "threshold_value"]
@@ -1113,20 +1127,7 @@ def filter_min_distance(
                         < features.loc[index_2, "threshold_value"]
                     ):
                         remove_list_distance.append(index_1)
-                    elif (
-                        features.loc[index_1, "threshold_value"]
-                        == features.loc[index_2, "threshold_value"]
-                    ):
-                        if features.loc[index_1, "num"] > features.loc[index_2, "num"]:
-                            remove_list_distance.append(index_2)
-                        elif (
-                            features.loc[index_1, "num"] < features.loc[index_2, "num"]
-                        ):
-                            remove_list_distance.append(index_1)
-                        elif (
-                            features.loc[index_1, "num"] == features.loc[index_2, "num"]
-                        ):
-                            remove_list_distance.append(index_2)
+
                 elif target == "minimum":
                     if (
                         features.loc[index_1, "threshold_value"]
@@ -1138,20 +1139,6 @@ def filter_min_distance(
                         > features.loc[index_2, "threshold_value"]
                     ):
                         remove_list_distance.append(index_1)
-                    elif (
-                        features.loc[index_1, "threshold_value"]
-                        == features.loc[index_2, "threshold_value"]
-                    ):
-                        if features.loc[index_1, "num"] > features.loc[index_2, "num"]:
-                            remove_list_distance.append(index_2)
-                        elif (
-                            features.loc[index_1, "num"] < features.loc[index_2, "num"]
-                        ):
-                            remove_list_distance.append(index_1)
-                        elif (
-                            features.loc[index_1, "num"] == features.loc[index_2, "num"]
-                        ):
-                            remove_list_distance.append(index_2)
 
     features = features[~features.index.isin(remove_list_distance)]
     return features
diff --git a/tobac/segmentation.py b/tobac/segmentation.py
@@ -466,14 +466,16 @@ def segmentation_timestep(
     # Write resulting mask into cube for output
     segmentation_out.data = segmentation_mask
 
-    # count number of grid cells asoociated to each tracked cell and write that into DataFrame:
+    # count number of grid cells associated to each tracked cell and write that into DataFrame:
     values, count = np.unique(segmentation_mask, return_counts=True)
     counts = dict(zip(values, count))
     ncells = np.zeros(len(features_out))
     for i, (index, row) in enumerate(features_out.iterrows()):
         if row["feature"] in counts.keys():
-            ncells = counts[row["feature"]]
-    features_out["ncells"] = ncells
+            # assign a value for ncells for the respective feature in data frame
+            features_out.loc[features_out.feature == row["feature"], "ncells"] = counts[
+                row["feature"]
+            ]
 
     return segmentation_out, features_out
 

diff --git a/tobac/tests/test_segmentation.py b/tobac/tests/test_segmentation.py
@@ -1,7 +1,6 @@
 import pytest
-import tobac.testing as testing
-import tobac.segmentation as seg
 import numpy as np
+from tobac import segmentation, feature_detection, testing
 
 
 def test_segmentation_timestep_2D_feature_2D_seg():
@@ -40,7 +39,7 @@ def test_segmentation_timestep_2D_feature_2D_seg():
         start_h1=20.0, start_h2=20.0, max_h1=1000, max_h2=1000
     )
 
-    out_seg_mask, out_df = seg.segmentation_timestep(
+    out_seg_mask, out_df = segmentation.segmentation_timestep(
         field_in=test_data_iris,
         features_in=test_feature_ds,
         dxy=test_dxy,
@@ -116,7 +115,7 @@ def test_segmentation_timestep_level():
         start_h1=20.0, start_h2=20.0, max_h1=1000, max_h2=1000
     )
 
-    out_seg_mask, out_df = seg.segmentation_timestep(
+    out_seg_mask, out_df = segmentation.segmentation_timestep(
         field_in=test_data_iris,
         features_in=test_feature_ds,
         dxy=test_dxy,
@@ -143,7 +142,7 @@ def test_segmentation_timestep_level():
     )
 
     # now set specific levels
-    out_seg_mask, out_df = seg.segmentation_timestep(
+    out_seg_mask, out_df = segmentation.segmentation_timestep(
         field_in=test_data_iris,
         features_in=test_feature_ds,
         dxy=test_dxy,
@@ -198,14 +197,9 @@ def test_segmentation_timestep_3d_seed_box_nopbcs(
         True if we expect both features to be segmented, false
         if we don't expect them both to be segmented
 
-    """
-
-    import numpy as np
-
-    """
     The best way to do this I think is to create two blobs near (but not touching)
     each other, varying the seed_3D_size so that they are either segmented together
-    or not segmented together. 
+    or not segmented together.
     """
     test_dset_size = (20, 50, 50)
     test_hdim_1_pt_1 = 20.0
@@ -250,7 +244,7 @@ def test_segmentation_timestep_3d_seed_box_nopbcs(
         max_h2=1000,
     )
 
-    out_seg_mask, out_df = seg.segmentation_timestep(
+    out_seg_mask, out_df = segmentation.segmentation_timestep(
         field_in=test_data_iris,
         features_in=test_feature_ds,
         dxy=test_dxy,
@@ -345,7 +339,7 @@ def test_different_z_axes(
         max_h2=1000,
     )
     if not expected_raise:
-        out_seg_mask, out_df = seg.segmentation_timestep(
+        out_seg_mask, out_df = segmentation.segmentation_timestep(
             field_in=test_data_iris,
             features_in=test_feature_ds,
             dxy=test_dxy,
@@ -358,9 +352,76 @@ def test_different_z_axes(
     else:
         # Expecting a raise
         with pytest.raises(ValueError):
-            out_seg_mask, out_df = seg.segmentation_timestep(
+            out_seg_mask, out_df = segmentation.segmentation_timestep(
                 field_in=test_data_iris,
                 features_in=test_feature_ds,
                 dxy=test_dxy,
                 threshold=1.5,
             )
+
+
+def test_segmentation_multiple_features():
+    """Tests `tobac.segmentation.segmentation_timestep` with a 2D input containing multiple features with different areas.
+    Tests specifically whether their area (ncells) is correctly calculate and assigned to the different features.
+    """
+    test_dset_size = (50, 50)
+    test_hdim_1_pt = 20.0
+    test_hdim_2_pt = 20.0
+    test_hdim_1_sz = 5
+    test_hdim_2_sz = 5
+    size_feature1 = test_hdim_1_sz * test_hdim_2_sz
+    test_amp = 2
+    test_min_num = 2
+
+    test_data = np.zeros(test_dset_size)
+    test_data = testing.make_feature_blob(
+        test_data,
+        test_hdim_1_pt,
+        test_hdim_2_pt,
+        h1_size=test_hdim_1_sz,
+        h2_size=test_hdim_2_sz,
+        amplitude=test_amp,
+    )
+
+    # add feature of different size
+    test_hdim_1_pt = 40.0
+    test_hdim_2_pt = 40.0
+    test_hdim_1_sz = 10
+    test_hdim_2_sz = 10
+    size_feature2 = test_hdim_1_sz * test_hdim_2_sz
+    test_amp = 10
+    test_dxy = 1
+
+    test_data = testing.make_feature_blob(
+        test_data,
+        test_hdim_1_pt,
+        test_hdim_2_pt,
+        h1_size=test_hdim_1_sz,
+        h2_size=test_hdim_2_sz,
+        amplitude=test_amp,
+    )
+
+    test_data_iris = testing.make_dataset_from_arr(test_data, data_type="iris")
+
+    # detect both features
+    fd_output = feature_detection.feature_detection_multithreshold_timestep(
+        test_data_iris,
+        i_time=0,
+        dxy=1,
+        threshold=[1, 2, 3],
+        n_min_threshold=test_min_num,
+        target="maximum",
+    )
+
+    # add feature IDs to data frame for one time step
+    fd_output["feature"] = [1, 2]
+
+    # perform segmentation
+    out_seg_mask, out_df = segmentation.segmentation_timestep(
+        field_in=test_data_iris, features_in=fd_output, dxy=test_dxy, threshold=1.5
+    )
+    out_seg_mask_arr = out_seg_mask.core_data()
+
+    # assure that the number of grid cells belonging to each feature (ncells) are consistent with segmentation mask
+    assert int(out_df[out_df.feature == 1].ncells.values) == size_feature1
+    assert int(out_df[out_df.feature == 2].ncells.values) == size_feature2