Skip to content

Commit

Permalink
Merge pull request #256 from tobac-project/hotfix
Browse files Browse the repository at this point in the history
Merge `hotfix` into `main` for v1.4.2
  • Loading branch information
JuliaKukulies authored Feb 21, 2023
2 parents 09b176b + 943458e commit c906eb0
Show file tree
Hide file tree
Showing 11 changed files with 213 additions and 56 deletions.
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,17 @@
### Tobac Changelog

_**Version 1.4.2:**_


**Bug fix**

- Fixed a bug in the segmentation procedure that assigned the wrong grid cell areas to features in data frame [#246](https://github.com/tobac-project/tobac/pull/246)

- Fixed a bug in feature_detection.filter_min_distance() that always selected the feature with the largest threshold, even if the feature detection is targeting minima. The target is now an optional input parameter for the distance filtering [#251](https://github.com/tobac-project/tobac/pull/251)

- Fixed an issue in the 2D coordinate interpolation that produced object dtypes in feature detection and made the feature input data frame incompatible with the merge and split function [#251](https://github.com/tobac-project/tobac/pull/251)


_**Version 1.4.1:**_

**Bug fixes**
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

23 changes: 16 additions & 7 deletions examples/Example_Precip_Tracking/Example_Precip_Tracking.ipynb

Large diffs are not rendered by default.

23 changes: 16 additions & 7 deletions examples/Example_Updraft_Tracking/Example_Updraft_Tracking.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tobac/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,4 @@
from . import merge_split

# Set version number
__version__ = "1.4.1"
__version__ = "1.4.2"
51 changes: 38 additions & 13 deletions tobac/feature_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,7 @@ def feature_detection_multithreshold(
# Loop over DataFrame to remove features that are closer than distance_min to each other:
if min_distance > 0:
features_thresholds = filter_min_distance(
features_thresholds, dxy, min_distance
features_thresholds, dxy, min_distance, target=target
)
list_features_timesteps.append(features_thresholds)

Expand All @@ -701,7 +701,7 @@ def feature_detection_multithreshold(
return features


def filter_min_distance(features, dxy, min_distance):
def filter_min_distance(features, dxy, min_distance, target="maximum"):
"""Perform feature detection based on contiguous regions.
Regions are above/below a threshold.
Expand All @@ -716,6 +716,10 @@ def filter_min_distance(features, dxy, min_distance):
min_distance : float, optional
Minimum distance (in meter) between detected features.
target : str {maximum | minimum}, optional
Whether the threshod target is a maxima or minima (defaults to
maximum)
Returns
-------
features : pandas.DataFrame
Expand All @@ -724,6 +728,11 @@ def filter_min_distance(features, dxy, min_distance):

from itertools import combinations

if target not in ["minimum", "maximum"]:
raise ValueError(
"target parameter must be set to either 'minimum' or 'maximum'"
)

remove_list_distance = []
# create list of tuples with all combinations of features at the timestep:
indices = combinations(features.index.values, 2)
Expand All @@ -737,26 +746,42 @@ def filter_min_distance(features, dxy, min_distance):
** 2
)
if distance <= min_distance:
# logging.debug('distance<= min_distance: ' + str(distance))
# If same threshold value, remove based on number of pixels
if (
features.loc[index_1, "threshold_value"]
> features.loc[index_2, "threshold_value"]
):
remove_list_distance.append(index_2)
elif (
features.loc[index_1, "threshold_value"]
< features.loc[index_2, "threshold_value"]
):
remove_list_distance.append(index_1)
elif (
features.loc[index_1, "threshold_value"]
== features.loc[index_2, "threshold_value"]
):
if features.loc[index_1, "num"] > features.loc[index_2, "num"]:
remove_list_distance.append(index_2)
elif features.loc[index_1, "num"] < features.loc[index_2, "num"]:
remove_list_distance.append(index_1)
# Tie break if both have the same number of pixels
elif features.loc[index_1, "num"] == features.loc[index_2, "num"]:
remove_list_distance.append(index_2)
# Else remove based on comparison of thresholds and target
elif target == "maximum":
if (
features.loc[index_1, "threshold_value"]
> features.loc[index_2, "threshold_value"]
):
remove_list_distance.append(index_2)
elif (
features.loc[index_1, "threshold_value"]
< features.loc[index_2, "threshold_value"]
):
remove_list_distance.append(index_1)

elif target == "minimum":
if (
features.loc[index_1, "threshold_value"]
< features.loc[index_2, "threshold_value"]
):
remove_list_distance.append(index_2)
elif (
features.loc[index_1, "threshold_value"]
> features.loc[index_2, "threshold_value"]
):
remove_list_distance.append(index_1)

features = features[~features.index.isin(remove_list_distance)]
return features
8 changes: 5 additions & 3 deletions tobac/segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,14 +264,16 @@ def segmentation_timestep(
# Write resulting mask into cube for output
segmentation_out.data = segmentation_mask

# count number of grid cells asoociated to each tracked cell and write that into DataFrame:
# count number of grid cells associated to each tracked cell and write that into DataFrame:
values, count = np.unique(segmentation_mask, return_counts=True)
counts = dict(zip(values, count))
ncells = np.zeros(len(features_out))
for i, (index, row) in enumerate(features_out.iterrows()):
if row["feature"] in counts.keys():
ncells = counts[row["feature"]]
features_out["ncells"] = ncells
# assign a value for ncells for the respective feature in data frame
features_out.loc[features_out.feature == row["feature"], "ncells"] = counts[
row["feature"]
]

return segmentation_out, features_out

Expand Down
24 changes: 18 additions & 6 deletions tobac/tests/test_feature_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ def test_filter_min_distance(test_threshs, min_distance, dxy):
## add another blob with smaller value
test_hdim_1_pt2 = 25.0
test_hdim_2_pt2 = 25.0
test_hdim_1_sz2 = 2
test_hdim_2_sz2 = 2
test_hdim_1_sz2 = 3
test_hdim_2_sz2 = 3
test_amp2 = 3
test_data = tbtest.make_feature_blob(
test_data,
Expand All @@ -107,7 +107,9 @@ def test_filter_min_distance(test_threshs, min_distance, dxy):
)

# check if it function to filter
fd_filtered = feat_detect.filter_min_distance(fd_output, dxy, min_distance)
fd_filtered = feat_detect.filter_min_distance(
fd_output, dxy, min_distance, target="maximum"
)

# Make sure we have only one feature (small feature in minimum distance should be removed )
assert len(fd_output.index) == 2
Expand All @@ -116,6 +118,18 @@ def test_filter_min_distance(test_threshs, min_distance, dxy):
assert fd_filtered.iloc[0]["hdim_1"] == pytest.approx(test_hdim_1_pt)
assert fd_filtered.iloc[0]["hdim_2"] == pytest.approx(test_hdim_2_pt)

# check if it function to filter
fd_filtered = feat_detect.filter_min_distance(
fd_output, dxy, min_distance, target="minimum"
)

# Make sure we have only one feature (small feature in minimum distance should be removed )
assert len(fd_output.index) == 2
assert len(fd_filtered.index) == 1
# Make sure that the locations of the features is correct (should correspond to locations of second feature)
assert fd_filtered.iloc[0]["hdim_1"] == pytest.approx(test_hdim_1_pt2)
assert fd_filtered.iloc[0]["hdim_2"] == pytest.approx(test_hdim_2_pt2)


@pytest.mark.parametrize(
"position_threshold", [("center"), ("extreme"), ("weighted_diff"), ("weighted_abs")]
Expand All @@ -130,9 +144,7 @@ def test_feature_detection_position(position_threshold):
test_data = np.zeros(test_dset_size)

test_data[0:5, 0:5] = 3
test_threshs = [
1.5,
]
test_threshs = [1.5]
test_min_num = 2

test_data_iris = tbtest.make_dataset_from_arr(test_data, data_type="iris")
Expand Down
76 changes: 71 additions & 5 deletions tobac/tests/test_segmentation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import tobac.testing as testing
import tobac.segmentation as seg
import numpy as np
from tobac import segmentation, feature_detection, testing


def test_segmentation_timestep_level():
Expand All @@ -9,7 +9,6 @@ def test_segmentation_timestep_level():
# Before we can run segmentation, we must run feature detection.

# start by building a simple dataset with a single feature
import numpy as np

test_dset_size = (20, 50, 50)
test_hdim_1_pt = 20.0
Expand Down Expand Up @@ -61,7 +60,7 @@ def test_segmentation_timestep_level():
# Generate dummy feature dataset
test_feature_ds = testing.generate_single_feature(start_h1=20.0, start_h2=20.0)

out_seg_mask, out_df = seg.segmentation_timestep(
out_seg_mask, out_df = segmentation.segmentation_timestep(
field_in=test_data_iris,
features_in=test_feature_ds,
dxy=test_dxy,
Expand All @@ -87,7 +86,7 @@ def test_segmentation_timestep_level():
)

# now set specific levels
out_seg_mask, out_df = seg.segmentation_timestep(
out_seg_mask, out_df = segmentation.segmentation_timestep(
field_in=test_data_iris,
features_in=test_feature_ds,
dxy=test_dxy,
Expand All @@ -112,3 +111,70 @@ def test_segmentation_timestep_level():
]
== np.zeros((test_vdim_sz, test_hdim_1_sz, test_hdim_2_sz))
)


def test_segmentation_multiple_features():
"""Tests `tobac.segmentation.segmentation_timestep` with a 2D input containing multiple features with different areas.
Tests specifically whether their area (ncells) is correctly calculate and assigned to the different features.
"""
test_dset_size = (50, 50)
test_hdim_1_pt = 20.0
test_hdim_2_pt = 20.0
test_hdim_1_sz = 5
test_hdim_2_sz = 5
size_feature1 = test_hdim_1_sz * test_hdim_2_sz
test_amp = 2
test_min_num = 2

test_data = np.zeros(test_dset_size)
test_data = testing.make_feature_blob(
test_data,
test_hdim_1_pt,
test_hdim_2_pt,
h1_size=test_hdim_1_sz,
h2_size=test_hdim_2_sz,
amplitude=test_amp,
)

# add feature of different size
test_hdim_1_pt = 40.0
test_hdim_2_pt = 40.0
test_hdim_1_sz = 10
test_hdim_2_sz = 10
size_feature2 = test_hdim_1_sz * test_hdim_2_sz
test_amp = 10
test_dxy = 1

test_data = testing.make_feature_blob(
test_data,
test_hdim_1_pt,
test_hdim_2_pt,
h1_size=test_hdim_1_sz,
h2_size=test_hdim_2_sz,
amplitude=test_amp,
)

test_data_iris = testing.make_dataset_from_arr(test_data, data_type="iris")

# detect both features
fd_output = feature_detection.feature_detection_multithreshold_timestep(
test_data_iris,
i_time=0,
dxy=1,
threshold=[1, 2, 3],
n_min_threshold=test_min_num,
target="maximum",
)

# add feature IDs to data frame for one time step
fd_output["feature"] = [1, 2]

# perform segmentation
out_seg_mask, out_df = segmentation.segmentation_timestep(
field_in=test_data_iris, features_in=fd_output, dxy=test_dxy, threshold=1.5
)
out_seg_mask_arr = out_seg_mask.core_data()

# assure that the number of grid cells belonging to each feature (ncells) are consistent with segmentation mask
assert int(out_df[out_df.feature == 1].ncells.values) == size_feature1
assert int(out_df[out_df.feature == 2].ncells.values) == size_feature2
8 changes: 6 additions & 2 deletions tobac/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,11 +557,15 @@ def add_coordinates(t, variable_cube):

if variable_cube.coord_dims(coord) == (hdim_1, hdim_2):
f = interp2d(dimvec_2, dimvec_1, variable_cube.coord(coord).points)
coordinate_points = [f(a, b) for a, b in zip(t["hdim_2"], t["hdim_1"])]
coordinate_points = np.asarray(
[f(a, b) for a, b in zip(t["hdim_2"], t["hdim_1"])]
)

if variable_cube.coord_dims(coord) == (hdim_2, hdim_1):
f = interp2d(dimvec_1, dimvec_2, variable_cube.coord(coord).points)
coordinate_points = [f(a, b) for a, b in zip(t["hdim_1"], t["hdim_2"])]
coordinate_points = np.asarray(
[f(a, b) for a, b in zip(t["hdim_1"], t["hdim_2"])]
)

# interpolate 3D coordinates:
# mainly workaround for wrf latitude and longitude (to be fixed in future)
Expand Down

0 comments on commit c906eb0

Please sign in to comment.