Skip to content

Commit

Permalink
Merge pull request #242 from freemansw1/lat_lon_transform
Browse files Browse the repository at this point in the history
Adding ability to track on one dataset, segment on another
  • Loading branch information
freemansw1 authored Jul 10, 2023
2 parents d7a340d + c77396e commit 205f761
Show file tree
Hide file tree
Showing 16 changed files with 1,020 additions and 47 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/check_notebooks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
- name: Install tobac dependencies
run: |
conda install -c conda-forge --yes ffmpeg gcc jupyter pytables
conda install -c conda-forge --yes --file requirements.txt
conda install -c conda-forge --yes --file example_requirements.txt
- name: Install tobac
run: |
pip install .
Expand Down
Binary file added doc/images/sat_radar_combined.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ The project is currently being extended by several contributors to include addit
segmentation_parameters
segmentation_output
features_without_segmented_area
transform_segmentation

.. toctree::
:caption: Tracking
Expand Down
9 changes: 9 additions & 0 deletions doc/transform_segmentation.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Track on one dataset, segment on another
----------------------------------------

*tobac* also has the capability to combine datasets through :doc:`segmentation`, which includes the ability to track on one dataset (e.g., gridded radar data) and run segmentation on a different dataset *on a different grid* (e.g., satellite data).

.. image:: images/sat_radar_combined.png
:width: 500 px

To do this, users should first run :doc:`feature_detection_overview` with a dataset that contains latitude and longitude coordinates, such that they appear in the output dataframe from Feature Detection. Next, use :func:`tobac.utils.transform_feature_points` to transform the feature dataframe into the new coordinate system. Finally, use the output from :func:`tobac.utils.transform_feature_points` to run segmentation with the new data. This can be done with both 2D and 3D feature detection and segmentation.
1 change: 1 addition & 0 deletions example_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@ trackpy
jupyter
notebook
pytables
s3fs
arm_pyart

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions tobac/feature_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -1055,7 +1055,7 @@ def feature_detection_multithreshold(
min_distance=0,
feature_number_start=1,
PBC_flag="none",
vertical_coord="auto",
vertical_coord=None,
vertical_axis=None,
detect_subset=None,
wavelength_filtering=None,
Expand Down Expand Up @@ -1118,7 +1118,7 @@ def feature_detection_multithreshold(
'hdim_2' means that we are periodic along hdim2
'both' means that we are periodic along both horizontal dimensions
vertical_coord: str
Name of the vertical coordinate. If 'auto', tries to auto-detect.
Name of the vertical coordinate. If None, tries to auto-detect.
It looks for the coordinate or the dimension name corresponding
to the string.
vertical_axis: int or None.
Expand Down Expand Up @@ -1366,7 +1366,7 @@ def filter_min_distance(
This is typically `projection_y_coordinate`. Currently unused.
z_coordinate_name: str or None
The name of the z coordinate to calculate distance based on in meters.
This is typically `altitude`. If `auto`, tries to auto-detect.
This is typically `altitude`. If None, tries to auto-detect.
target: {'maximum', 'minimum'}, optional
Flag to determine if tracking is targetting minima or maxima in
the data. Default is 'maximum'.
Expand Down
9 changes: 5 additions & 4 deletions tobac/segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
"""
import copy
import logging
import numpy as np

import skimage
import numpy as np
Expand Down Expand Up @@ -316,7 +317,7 @@ def segmentation_timestep(
level=None,
method="watershed",
max_distance=None,
vertical_coord="auto",
vertical_coord=None,
PBC_flag="none",
seed_3D_flag="column",
seed_3D_size=5,
Expand Down Expand Up @@ -360,7 +361,7 @@ def segmentation_timestep(
belonging to that cell. Default is None.
vertical_coord : str, optional
Vertical coordinate in 3D input data. If 'auto', input is checked for
Vertical coordinate in 3D input data. If None, input is checked for
one of {'z', 'model_level_number', 'altitude','geopotential_height'}
as a likely coordinate name
Expand Down Expand Up @@ -1087,7 +1088,7 @@ def segmentation(
level=None,
method="watershed",
max_distance=None,
vertical_coord="auto",
vertical_coord=None,
PBC_flag="none",
seed_3D_flag="column",
seed_3D_size=5,
Expand Down Expand Up @@ -1206,7 +1207,7 @@ def segmentation(

for i, field_i in enumerate(field_time):
time_i = field_i.coord("time").units.num2date(field_i.coord("time").points[0])
features_i = features.loc[features["time"] == time_i]
features_i = features.loc[features["time"] == np.datetime64(time_i)]
segmentation_out_i, features_out_i = segmentation_timestep(
field_i,
features_i,
Expand Down
14 changes: 7 additions & 7 deletions tobac/tests/test_feature_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,14 +453,14 @@ def test_filter_min_distance(
"vertical_coord_name,"
" vertical_coord_opt, expected_raise",
[
((1, 20, 30, 40), 1, "altitude", "auto", False),
((1, 20, 30, 40), 2, "altitude", "auto", False),
((1, 20, 30, 40), 3, "altitude", "auto", False),
((1, 20, 30, 40), 1, "altitude", None, False),
((1, 20, 30, 40), 2, "altitude", None, False),
((1, 20, 30, 40), 3, "altitude", None, False),
((1, 20, 30, 40), 1, "air_pressure", "air_pressure", False),
((1, 20, 30, 40), 1, "air_pressure", "auto", True),
((1, 20, 30, 40), 1, "model_level_number", "auto", False),
((1, 20, 30, 40), 1, "altitude", "auto", False),
((1, 20, 30, 40), 1, "geopotential_height", "auto", False),
((1, 20, 30, 40), 1, "air_pressure", None, True),
((1, 20, 30, 40), 1, "model_level_number", None, False),
((1, 20, 30, 40), 1, "altitude", None, False),
((1, 20, 30, 40), 1, "geopotential_height", None, False),
],
)
def test_feature_detection_multiple_z_coords(
Expand Down
14 changes: 7 additions & 7 deletions tobac/tests/test_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,14 +471,14 @@ def test_segmentation_timestep_3d_seed_box_nopbcs(
"vertical_coord_name,"
" vertical_coord_opt, expected_raise",
[
((20, 30, 40), 0, "altitude", "auto", False),
((20, 30, 40), 1, "altitude", "auto", False),
((20, 30, 40), 2, "altitude", "auto", False),
((20, 30, 40), 0, "altitude", None, False),
((20, 30, 40), 1, "altitude", None, False),
((20, 30, 40), 2, "altitude", None, False),
((20, 30, 40), 0, "air_pressure", "air_pressure", False),
((20, 30, 40), 0, "air_pressure", "auto", True),
((20, 30, 40), 0, "model_level_number", "auto", False),
((20, 30, 40), 0, "altitude", "auto", False),
((20, 30, 40), 0, "geopotential_height", "auto", False),
((20, 30, 40), 0, "air_pressure", None, True),
((20, 30, 40), 0, "model_level_number", None, False),
((20, 30, 40), 0, "altitude", None, False),
((20, 30, 40), 0, "geopotential_height", None, False),
],
)
def test_different_z_axes(
Expand Down
138 changes: 135 additions & 3 deletions tobac/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import pandas.testing as pd_test
import numpy as np
from scipy import fft
import xarray as xr


def lists_equal_without_order(a, b):
Expand Down Expand Up @@ -265,11 +266,11 @@ def test_add_coordinates_3D(
@pytest.mark.parametrize(
"vertical_coord_names, vertical_coord_pass_in, expect_raise",
[
(["z"], "auto", False),
(["pudding"], "auto", True),
(["z"], None, False),
(["pudding"], None, True),
(["pudding"], "pudding", False),
(["z", "model_level_number"], "pudding", True),
(["z", "model_level_number"], "auto", True),
(["z", "model_level_number"], None, True),
(["z", "model_level_number"], "z", False),
],
)
Expand Down Expand Up @@ -398,3 +399,134 @@ def test_combine_tobac_feats():
)
assert np.all(list(combined_feat["old_feat_column"].values) == [1, 1])
assert np.all(list(combined_feat["feature"].values) == [1, 2])


def test_transform_feature_points():
"""Tests tobac.utils.general.transform_feature_points"""

# generate features
orig_feat_df_1 = tb_test.generate_single_feature(0, 95, max_h1=1000, max_h2=1000)
orig_feat_df_2 = tb_test.generate_single_feature(5, 105, max_h1=1000, max_h2=1000)

orig_feat_df = tb_utils.combine_tobac_feats([orig_feat_df_1, orig_feat_df_2])

# just make their lat/lons the same as the hdims.
orig_feat_df["latitude"] = orig_feat_df["hdim_1"]
orig_feat_df["longitude"] = orig_feat_df["hdim_2"]

# Make a test dataset with lats spanning from -25 to 24
# and lons spanning from 90 to 139.
test_lat = np.linspace(-25, 24, 50)
test_lon = np.linspace(90, 139, 50)
in_xr = xr.Dataset(
{"data": (("latitude", "longitude"), np.empty((50, 50)))},
coords={"latitude": test_lat, "longitude": test_lon},
)

new_feat_df = tb_utils.general.transform_feature_points(
orig_feat_df,
in_xr["data"].to_iris(),
max_time_away=datetime.timedelta(minutes=1),
max_space_away=20 * 1000,
)
# recall that these are the *array positions*
# so [25, 5] for "hdim_1" and "hdim_2" are lat 0, long 95.
assert np.all(new_feat_df["hdim_1"] == [25, 30])
assert np.all(new_feat_df["hdim_2"] == [5, 15])

# now test max space apart - we should drop the second feature,
# which is at 5, 105 lat/lon as the maximum latitude in the new dataset is 0.
# we set the max space away at 20km.
test_lat = np.linspace(-49, 0, 50)
in_xr = xr.Dataset(
{"data": (("latitude", "longitude"), np.empty((50, 50)))},
coords={"latitude": test_lat, "longitude": test_lon},
)

new_feat_df = tb_utils.general.transform_feature_points(
orig_feat_df,
in_xr["data"].to_iris(),
max_space_away=20000,
max_time_away=datetime.timedelta(minutes=1),
)

assert np.all(new_feat_df["hdim_1"] == [49])
assert np.all(new_feat_df["hdim_2"] == [5])

# now test max time apart
test_lat = np.linspace(-25, 24, 50)
in_xr = xr.Dataset(
{"data": (("time", "latitude", "longitude"), np.empty((2, 50, 50)))},
coords={
"latitude": test_lat,
"longitude": test_lon,
"time": [
datetime.datetime(2023, 1, 1, 0, 0),
datetime.datetime(2023, 1, 1, 0, 5),
],
},
)

orig_feat_df["time"] = datetime.datetime(2023, 1, 1, 0, 0, 5)
new_feat_df = tb_utils.general.transform_feature_points(
orig_feat_df,
in_xr["data"].to_iris(),
max_time_away=datetime.timedelta(minutes=10),
max_space_away=20 * 1000,
)
# we should still have both features, but they should have the new time.
assert np.all(new_feat_df["hdim_1"] == [25, 30])
assert np.all(new_feat_df["hdim_2"] == [5, 15])
assert np.all(
new_feat_df["time"]
== [datetime.datetime(2023, 1, 1, 0, 0), datetime.datetime(2023, 1, 1, 0, 0)]
)

# now make the features have time on the next day
# both should be dropped.
orig_feat_df["time"] = datetime.datetime(2023, 1, 2, 0, 0)
new_feat_df = tb_utils.general.transform_feature_points(
orig_feat_df,
in_xr["data"].to_iris(),
max_time_away=datetime.timedelta(minutes=1),
)

assert np.all(new_feat_df["hdim_1"] == [])
assert np.all(new_feat_df["hdim_2"] == [])


def test_transform_feature_points_3D():
"""Tests tobac.utils.general.transform_feature_points for a 3D case"""

orig_feat_df_1 = tb_test.generate_single_feature(
0, 95, 10, max_h1=1000, max_h2=1000
)
orig_feat_df_2 = tb_test.generate_single_feature(
5, 105, 20, max_h1=1000, max_h2=1000
)

orig_feat_df = tb_utils.combine_tobac_feats([orig_feat_df_1, orig_feat_df_2])

orig_feat_df["latitude"] = orig_feat_df["hdim_1"]
orig_feat_df["longitude"] = orig_feat_df["hdim_2"]
orig_feat_df["altitude"] = orig_feat_df["vdim"] * 1000

test_lat = np.linspace(-25, 24, 50)
test_lon = np.linspace(90, 139, 50)
test_alt = np.arange(0, 21, 2) * 1000
in_xr = xr.Dataset(
{"data": (("altitude", "latitude", "longitude"), np.empty((11, 50, 50)))},
coords={"latitude": test_lat, "longitude": test_lon, "altitude": test_alt},
)

new_feat_df = tb_utils.general.transform_feature_points(
orig_feat_df,
in_xr["data"].to_iris(),
max_time_away=datetime.timedelta(minutes=1),
max_space_away=20 * 1000,
max_vspace_away=200,
)

assert np.all(new_feat_df["hdim_1"] == [25, 30])
assert np.all(new_feat_df["hdim_2"] == [5, 15])
assert np.all(new_feat_df["vdim"] == [5, 10])
30 changes: 30 additions & 0 deletions tobac/tests/test_utils_internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import pytest
import numpy as np
import xarray as xr


@pytest.mark.parametrize(
Expand Down Expand Up @@ -60,3 +61,32 @@ def test_find_hdim_axes_3D(dset_type, time_axis, vertical_axis, expected_out):
out_coords = internal_utils.find_hdim_axes_3D(cube_test)

assert out_coords == expected_out


@pytest.mark.parametrize(
"lat_name, lon_name, lat_name_test, lon_name_test, expected_result",
[
("lat", "lon", None, None, ("lat", "lon")),
("lat", "long", None, None, ("lat", "long")),
("lat", "altitude", None, None, ("lat", None)),
("lat", "longitude", "lat", "longitude", ("lat", "longitude")),
],
)
def test_detect_latlon_coord_name(
lat_name, lon_name, lat_name_test, lon_name_test, expected_result
):
"""Tests tobac.utils.internal.detect_latlon_coord_name"""

in_arr = np.empty((50, 50))
lat_vals = np.empty(50)
lon_vals = np.empty(50)

in_xr = xr.Dataset(
{"data": ((lat_name, lon_name), in_arr)},
coords={lat_name: lat_vals, lon_name: lon_vals},
)
out_lat_name, out_lon_name = internal_utils.detect_latlon_coord_name(
in_xr["data"].to_iris(), lat_name_test, lon_name_test
)
assert out_lat_name == expected_result[0]
assert out_lon_name == expected_result[1]
2 changes: 1 addition & 1 deletion tobac/tracking.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def linking_trackpy(
vertical_coord: str
Name of the vertical coordinate. The vertical coordinate used
must be meters. If 'auto', tries to auto-detect.
must be meters. If None, tries to auto-detect.
It looks for the coordinate or the dimension name corresponding
to the string. To use `dz`, set this to `None`.
Expand Down
1 change: 1 addition & 0 deletions tobac/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
get_spacings,
get_bounding_box,
combine_tobac_feats,
transform_feature_points,
)
from .mask import (
mask_cell,
Expand Down
Loading

0 comments on commit 205f761

Please sign in to comment.