From 4cc03b10608f5fd2235843f85d5040bca72a905a Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Thu, 17 Oct 2024 07:28:55 -0500 Subject: [PATCH 01/28] add velocity generation file --- pyproject.toml | 1 + src/opera_disp_tms/generate_sw_vel_tile.py | 0 2 files changed, 1 insertion(+) create mode 100644 src/opera_disp_tms/generate_sw_vel_tile.py diff --git a/pyproject.toml b/pyproject.toml index f73a7c8..8a5d15d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ Documentation = "https://github.com/ASFHyP3/opera-disp-tms/tree/develop?tab=read [project.scripts] generate_metadata_tile = "opera_disp_tms.generate_metadata_tile:main" generate_sw_disp_tile = "opera_disp_tms.generate_sw_disp_tile:main" +generate_sw_vel_tile = "opera_disp_tms.generate_sw_vel_tile:main" get_tmp_s3_creds = "opera_disp_tms.tmp_s3_access:main" create_tile_map = "opera_disp_tms.create_tile_map:main" diff --git a/src/opera_disp_tms/generate_sw_vel_tile.py b/src/opera_disp_tms/generate_sw_vel_tile.py new file mode 100644 index 0000000..e69de29 From 53c4088c7f481baf357ba7ddc0a2e18e4ead03aa Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Thu, 17 Oct 2024 08:27:27 -0500 Subject: [PATCH 02/28] add basics for velocity --- src/opera_disp_tms/generate_sw_disp_tile.py | 82 ++++++++++++++------- src/opera_disp_tms/generate_sw_vel_tile.py | 79 ++++++++++++++++++++ 2 files changed, 133 insertions(+), 28 deletions(-) diff --git a/src/opera_disp_tms/generate_sw_disp_tile.py b/src/opera_disp_tms/generate_sw_disp_tile.py index 56bae68..99ed1e4 100644 --- a/src/opera_disp_tms/generate_sw_disp_tile.py +++ b/src/opera_disp_tms/generate_sw_disp_tile.py @@ -6,6 +6,7 @@ from typing import Iterable, Tuple import numpy as np +import xarray as xr from osgeo import gdal from rasterio.transform import Affine @@ -60,7 +61,9 @@ def frames_from_metadata(metadata_path: Path) -> dict[int, FrameMeta]: return frames -def find_needed_granules(frame_ids: Iterable[int], begin_date: datetime, end_date: datetime) -> list[Granule]: +def find_needed_granules( + frame_ids: Iterable[int], begin_date: datetime, end_date: datetime, strategy: str +) -> dict[int, Granule]: """Find the granules needed to generate a short wavelength displacement tile. For each `frame_id` the most recent granule whose secondary date is between `begin_date` and `end_date` is selected. @@ -69,23 +72,38 @@ def find_needed_granules(frame_ids: Iterable[int], begin_date: datetime, end_dat frame_ids: The frame ids to generate the tile for begin_date: Start of secondary date search range to generate tile for end_date: End of secondary date search range to generate tile for + strategy: Selection strategy for granules within search range ("minmax" or "all") + - Use "max" to get first granule + - Use "minmax" to get first and last granules + - Use "all" to get all granules Returns: - A list of granules needed to generate the tile + A dictionary with form {frame_id: [granules]} """ cali_dataset = find_california_dataset() - needed_granules = [] + needed_granules = {} for frame_id in frame_ids: granules = [g for g in cali_dataset if g.frame_id == frame_id and begin_date <= g.secondary_date <= end_date] - if not granules: - warnings.warn(f'No granules found for frame {frame_id} between {begin_date} and {end_date}.') + if len(granules) < 2: + warnings.warn(f'Less than two granules found for frame {frame_id} between {begin_date} and {end_date}.') + elif strategy == 'max': + oldest_granule = max(granules, key=lambda x: x.secondary_date) + needed_granules[frame_id] = [oldest_granule] + elif strategy == 'minmax': + youngest_granule = min(granules, key=lambda x: x.secondary_date) + oldest_granule = max(granules, key=lambda x: x.secondary_date) + needed_granules[frame_id] = [youngest_granule, oldest_granule] + elif strategy == 'all': + needed_granules[frame_id] = granules else: - granule = max(granules, key=lambda x: x.secondary_date) - needed_granules.append(granule) + raise ValueError(f'Invalid strategy: {strategy}. Must be "seacant" or "all".') + return needed_granules -def create_product_name(metadata_name: str, begin_date: datetime, end_date: datetime) -> str: +def create_product_name( + metadata_name: str, begin_date: datetime, end_date: datetime, prod_type: str = 'SW_CUMUL_DISP' +) -> str: """Create a product name for a short wavelength cumulative displacement tile Takes the form: SW_CUMUL_DISP_YYYYMMDD_YYYYMMDD_DIRECTION_TILECOORDS.tif @@ -93,15 +111,31 @@ def create_product_name(metadata_name: str, begin_date: datetime, end_date: date metadata_name: The name of the metadata file begin_date: Start of secondary date search range to generate tile for end_date: End of secondary date search range to generate tile for + prod_type: Product type prefix to use """ _, flight_direction, tile_coordinates = metadata_name.split('_') date_fmt = '%Y%m%d' begin_date = datetime.strftime(begin_date, date_fmt) end_date = datetime.strftime(end_date, date_fmt) - name = '_'.join(['SW_CUMUL_DISP', begin_date, end_date, flight_direction, tile_coordinates]) + name = '_'.join([prod_type, begin_date, end_date, flight_direction, tile_coordinates]) return name +def load_sw_disp_granule(granule: Granule, bbox: Iterable[int], frame: FrameMeta) -> xr.DataArray: + datasets = ['short_wavelength_displacement', 'connected_component_labels'] + granule_xr = open_opera_disp_granule(granule.s3_uri, datasets) + + same_ref_date = round_to_day(granule_xr.attrs['reference_date']) == round_to_day(frame.reference_date) + if not same_ref_date: + raise NotImplementedError('Granule reference date does not match metadata tile, this is not supported.') + + granule_xr = granule_xr.rio.clip_box(*bbox, crs='EPSG:3857') + valid_data_mask = granule_xr['connected_component_labels'] > 0 + sw_cumul_disp_xr = granule_xr['short_wavelength_displacement'].where(valid_data_mask, np.nan) + sw_cumul_disp_xr.attrs = granule_xr.attrs + return sw_cumul_disp_xr + + def add_granule_data_to_array( granule: Granule, frame: FrameMeta, frame_map: np.ndarray, geotransform: Affine, sw_cumul_disp: np.ndarray ) -> Tuple[np.ndarray, datetime]: @@ -117,23 +151,14 @@ def add_granule_data_to_array( Returns: The updated short wavelength cumulative displacement array and the secondary date of the granule """ - datasets = ['short_wavelength_displacement', 'connected_component_labels'] - granule_xr = open_opera_disp_granule(granule.s3_uri, datasets) - - same_ref_date = round_to_day(granule_xr.attrs['reference_date']) == round_to_day(frame.reference_date) - if not same_ref_date: - raise NotImplementedError('Granule reference date does not match metadata tile, this is not supported.') - bbox = create_buffered_bbox(geotransform.to_gdal(), frame_map.shape, 120) # EPSG:3857 is in meters - granule_xr = granule_xr.rio.clip_box(*bbox, crs='EPSG:3857') - valid_data_mask = granule_xr['connected_component_labels'] > 0 - sw_cumul_disp_xr = granule_xr['short_wavelength_displacement'].where(valid_data_mask, np.nan) - sw_cumul_disp_xr = sw_cumul_disp_xr.rio.reproject('EPSG:3857', transform=geotransform, shape=frame_map.shape) + sw_cumul_disp_xr = load_sw_disp_granule(granule, bbox, frame) - frame_locations = frame_map == granule_xr.attrs['frame_id'] + sw_cumul_disp_xr = sw_cumul_disp_xr.rio.reproject('EPSG:3857', transform=geotransform, shape=frame_map.shape) + frame_locations = frame_map == sw_cumul_disp_xr.attrs['frame_id'] sw_cumul_disp[frame_locations] = sw_cumul_disp_xr.data[frame_locations].astype(float) - secondary_date = datetime.strftime(granule_xr.attrs['secondary_date'], DATE_FORMAT) + secondary_date = datetime.strftime(sw_cumul_disp_xr.attrs['secondary_date'], DATE_FORMAT) return sw_cumul_disp, secondary_date @@ -160,20 +185,21 @@ def create_sw_disp_tile(metadata_path: Path, begin_date: datetime, end_date: dat print(f'Generating tile {product_name}') frames = frames_from_metadata(metadata_path) - needed_granules = find_needed_granules(list(frames.keys()), begin_date, end_date) + needed_granules = find_needed_granules(list(frames.keys()), begin_date, end_date, strategy='max') frame_map, geotransform = get_raster_as_numpy(metadata_path) geotransform = Affine.from_gdal(*geotransform) sw_cumul_disp = np.full(frame_map.shape, np.nan, dtype=float) secondary_dates = {} - for granule in needed_granules: - print(f'Granule {granule.scene_name} selected for frame {granule.frame_id}.') - frame = frames[granule.frame_id] + for frame_id, granules in needed_granules.items(): + granule = granules[0] + print(f'Granule {granule.scene_name} selected for frame {frame_id}.') + frame = frames[frame_id] sw_cumul_disp, secondary_date = add_granule_data_to_array( granule, frame, frame_map, geotransform, sw_cumul_disp ) - secondary_dates[f'FRAME_{frame.frame_id}_SEC_TIME'] = secondary_date + secondary_dates[f'FRAME_{frame_id}_SEC_TIME'] = secondary_date gdal.Translate(str(product_path), str(metadata_path), outputType=gdal.GDT_Float32, format='GTiff') ds = gdal.Open(str(product_path), gdal.GA_Update) @@ -192,7 +218,7 @@ def create_sw_disp_tile(metadata_path: Path, begin_date: datetime, end_date: dat def main(): """CLI entry point Example: - generate_sw_disp_tile METADATA_ASCENDING_N41W125_N42W124.tif 20170901 20171231 + generate_sw_disp_tile METADATA_ASCENDING_N42W124.tif 20170901 20171231 """ parser = argparse.ArgumentParser(description='Create a short wavelength cumulative displacement tile') parser.add_argument('metadata_path', type=str, help='Path to the metadata tile') diff --git a/src/opera_disp_tms/generate_sw_vel_tile.py b/src/opera_disp_tms/generate_sw_vel_tile.py index e69de29..db671ca 100644 --- a/src/opera_disp_tms/generate_sw_vel_tile.py +++ b/src/opera_disp_tms/generate_sw_vel_tile.py @@ -0,0 +1,79 @@ +import argparse +from datetime import datetime +from pathlib import Path + +import numpy as np +from osgeo import gdal +from rasterio.transform import Affine + +from opera_disp_tms import generate_sw_disp_tile as sw_disp +from opera_disp_tms.utils import create_buffered_bbox, get_raster_as_numpy + + +gdal.UseExceptions() + + +def add_velocity_data_to_array(granules, frame, geotransform, frame_map_array, out_array): + bbox = create_buffered_bbox(geotransform.to_gdal(), frame_map_array.shape, 90) # EPSG:3857 is in meters + # TODO: Multithred to speed up + granule_xrs = [sw_disp.load_sw_disp_granule(x, bbox, frame) for x in granules] + return + + +def create_sw_vel_tile(metadata_path: Path, begin_date: datetime, end_date: datetime, minmax: bool = True): + if not metadata_path.exists(): + raise FileNotFoundError(f'{metadata_path} does not exist') + if begin_date > end_date: + raise ValueError('Begin date must be before end date') + + product_name = sw_disp.create_product_name(metadata_path.name, begin_date, end_date, 'SW_VELOCITY') + product_path = Path.cwd() / product_name + print(f'Generating tile {product_name}') + + frames = sw_disp.frames_from_metadata(metadata_path) + strategy = 'minmax' if minmax else 'all' + needed_granules = sw_disp.find_needed_granules(list(frames.keys()), begin_date, end_date, strategy=strategy) + + frame_map, geotransform = get_raster_as_numpy(metadata_path) + geotransform = Affine.from_gdal(*geotransform) + sw_vel = np.full(frame_map.shape, np.nan, dtype=float) + for frame_id, granules in needed_granules.items(): + frame = frames[frame_id] + sw_vel = add_velocity_data_to_array(granules, frame, geotransform, frame_map, sw_vel) + + gdal.Translate(str(product_path), str(metadata_path), outputType=gdal.GDT_Float32, format='GTiff') + ds = gdal.Open(str(product_path), gdal.GA_Update) + band = ds.GetRasterBand(1) + band.SetNoDataValue(np.nan) + band.WriteArray(sw_vel) + ds.SetMetadata(ds.GetMetadata()) + ds.FlushCache() + ds = None + print('Done!') + return product_path + + +def main(): + """CLI entry point + Example: + generate_sw_disp_tile METADATA_ASCENDING_N42W124.tif 20160101 20190101 + """ + parser = argparse.ArgumentParser(description='Create a short wavelength cumulative displacement tile') + parser.add_argument('metadata_path', type=str, help='Path to the metadata tile') + parser.add_argument( + 'begin_date', type=str, help='Start of secondary date search range to generate tile for in format: %Y%m%d' + ) + parser.add_argument( + 'end_date', type=str, help='End of secondary date search range to generate tile for in format: %Y%m%d' + ) + + args = parser.parse_args() + args.metadata_path = Path(args.metadata_path) + args.begin_date = datetime.strptime(args.begin_date, '%Y%m%d') + args.end_date = datetime.strptime(args.end_date, '%Y%m%d') + + create_sw_vel_tile(args.metadata_path, args.begin_date, args.end_date) + + +if __name__ == '__main__': + main() From ed2b61093248b59233b21c56671fc17c99e26a87 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Thu, 17 Oct 2024 14:50:49 +0000 Subject: [PATCH 03/28] working velocity generation --- src/opera_disp_tms/generate_sw_vel_tile.py | 30 +++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/src/opera_disp_tms/generate_sw_vel_tile.py b/src/opera_disp_tms/generate_sw_vel_tile.py index db671ca..727544b 100644 --- a/src/opera_disp_tms/generate_sw_vel_tile.py +++ b/src/opera_disp_tms/generate_sw_vel_tile.py @@ -3,6 +3,7 @@ from pathlib import Path import numpy as np +import xarray as xr from osgeo import gdal from rasterio.transform import Affine @@ -13,11 +14,34 @@ gdal.UseExceptions() +def get_years_since_start(datetimes): + start = min(datetimes) + yrs_since_start = [(date - start).days / 365.25 for date in datetimes] + return yrs_since_start + + def add_velocity_data_to_array(granules, frame, geotransform, frame_map_array, out_array): bbox = create_buffered_bbox(geotransform.to_gdal(), frame_map_array.shape, 90) # EPSG:3857 is in meters # TODO: Multithred to speed up granule_xrs = [sw_disp.load_sw_disp_granule(x, bbox, frame) for x in granules] - return + cube = xr.concat(granule_xrs, dim='years_since_start') + # ensures output units are m/yr + years_since_start = get_years_since_start([g.attrs['secondary_date'] for g in granule_xrs]) + cube = cube.assign_coords(years_since_start=years_since_start) + del cube.attrs['secondary_date'] + non_nan_count = cube.count(dim='years_since_start') + cube = cube.where(non_nan_count >= 2, np.nan) + linear_fit = cube.polyfit(dim='years_since_start', deg=1) + # multiplying by 100 converts to cm/yr + velocity = xr.Dataset({'velocity': linear_fit.polyfit_coefficients.sel(degree=1) * 100, 'count': non_nan_count}) + velocity.attrs = cube.attrs + + velocity_reproj = velocity['velocity'].rio.reproject( + 'EPSG:3857', transform=geotransform, shape=frame_map_array.shape + ) + frame_locations = frame_map_array == velocity.attrs['frame_id'] + out_array[frame_locations] = velocity_reproj.data[frame_locations].astype(float) + return out_array def create_sw_vel_tile(metadata_path: Path, begin_date: datetime, end_date: datetime, minmax: bool = True): @@ -61,10 +85,10 @@ def main(): parser = argparse.ArgumentParser(description='Create a short wavelength cumulative displacement tile') parser.add_argument('metadata_path', type=str, help='Path to the metadata tile') parser.add_argument( - 'begin_date', type=str, help='Start of secondary date search range to generate tile for in format: %Y%m%d' + 'begin_date', type=str, help='Start of secondary date search range to generate tile for (e.g., 20211231)' ) parser.add_argument( - 'end_date', type=str, help='End of secondary date search range to generate tile for in format: %Y%m%d' + 'end_date', type=str, help='End of secondary date search range to generate tile for (e.g., 20211231)' ) args = parser.parse_args() From aaa09a22411f4c4afe2989bac9d9f4e2724ca9b8 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Thu, 17 Oct 2024 15:04:18 +0000 Subject: [PATCH 04/28] dask first attempt --- environment.yml | 1 + pyproject.toml | 1 + src/opera_disp_tms/generate_sw_vel_tile.py | 8 ++++++++ 3 files changed, 10 insertions(+) diff --git a/environment.yml b/environment.yml index 163abc2..89cb944 100644 --- a/environment.yml +++ b/environment.yml @@ -26,5 +26,6 @@ dependencies: - h5netcdf - xarray - rioxarray + - dask - s3fs - cachetools diff --git a/pyproject.toml b/pyproject.toml index 8a5d15d..2846ea4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ dependencies = [ "h5netcdf", "rioxarray", "xarray", + "dask", "cachetools", ] dynamic = ["version", "readme"] diff --git a/src/opera_disp_tms/generate_sw_vel_tile.py b/src/opera_disp_tms/generate_sw_vel_tile.py index 727544b..57c39be 100644 --- a/src/opera_disp_tms/generate_sw_vel_tile.py +++ b/src/opera_disp_tms/generate_sw_vel_tile.py @@ -4,6 +4,7 @@ import numpy as np import xarray as xr +from dask.distributed import LocalCluster from osgeo import gdal from rasterio.transform import Affine @@ -31,7 +32,14 @@ def add_velocity_data_to_array(granules, frame, geotransform, frame_map_array, o del cube.attrs['secondary_date'] non_nan_count = cube.count(dim='years_since_start') cube = cube.where(non_nan_count >= 2, np.nan) + + cluster = LocalCluster() + client = cluster.get_client() + cube = cube.chunk({'x': 500, 'y': 500, 'years_since_start': -1}) linear_fit = cube.polyfit(dim='years_since_start', deg=1) + linear_fit = linear_fit.compute(client=client) + client.close() + # multiplying by 100 converts to cm/yr velocity = xr.Dataset({'velocity': linear_fit.polyfit_coefficients.sel(degree=1) * 100, 'count': non_nan_count}) velocity.attrs = cube.attrs From 98b302a0363d1884b63d22931cb1c0c31d60a772 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Thu, 17 Oct 2024 21:34:54 +0000 Subject: [PATCH 05/28] add full option, remove dask --- environment.yml | 1 - pyproject.toml | 1 - src/opera_disp_tms/generate_sw_vel_tile.py | 18 ++++++++---------- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/environment.yml b/environment.yml index 89cb944..163abc2 100644 --- a/environment.yml +++ b/environment.yml @@ -26,6 +26,5 @@ dependencies: - h5netcdf - xarray - rioxarray - - dask - s3fs - cachetools diff --git a/pyproject.toml b/pyproject.toml index 2846ea4..8a5d15d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,6 @@ dependencies = [ "h5netcdf", "rioxarray", "xarray", - "dask", "cachetools", ] dynamic = ["version", "readme"] diff --git a/src/opera_disp_tms/generate_sw_vel_tile.py b/src/opera_disp_tms/generate_sw_vel_tile.py index 57c39be..12d10cc 100644 --- a/src/opera_disp_tms/generate_sw_vel_tile.py +++ b/src/opera_disp_tms/generate_sw_vel_tile.py @@ -4,7 +4,6 @@ import numpy as np import xarray as xr -from dask.distributed import LocalCluster from osgeo import gdal from rasterio.transform import Affine @@ -23,22 +22,16 @@ def get_years_since_start(datetimes): def add_velocity_data_to_array(granules, frame, geotransform, frame_map_array, out_array): bbox = create_buffered_bbox(geotransform.to_gdal(), frame_map_array.shape, 90) # EPSG:3857 is in meters - # TODO: Multithred to speed up + granule_xrs = [sw_disp.load_sw_disp_granule(x, bbox, frame) for x in granules] cube = xr.concat(granule_xrs, dim='years_since_start') # ensures output units are m/yr years_since_start = get_years_since_start([g.attrs['secondary_date'] for g in granule_xrs]) cube = cube.assign_coords(years_since_start=years_since_start) - del cube.attrs['secondary_date'] + non_nan_count = cube.count(dim='years_since_start') cube = cube.where(non_nan_count >= 2, np.nan) - - cluster = LocalCluster() - client = cluster.get_client() - cube = cube.chunk({'x': 500, 'y': 500, 'years_since_start': -1}) linear_fit = cube.polyfit(dim='years_since_start', deg=1) - linear_fit = linear_fit.compute(client=client) - client.close() # multiplying by 100 converts to cm/yr velocity = xr.Dataset({'velocity': linear_fit.polyfit_coefficients.sel(degree=1) * 100, 'count': non_nan_count}) @@ -65,6 +58,8 @@ def create_sw_vel_tile(metadata_path: Path, begin_date: datetime, end_date: date frames = sw_disp.frames_from_metadata(metadata_path) strategy = 'minmax' if minmax else 'all' needed_granules = sw_disp.find_needed_granules(list(frames.keys()), begin_date, end_date, strategy=strategy) + len_str = [f' {frame_id}: {len(needed_granules[frame_id])}' for frame_id in needed_granules] + print('\n'.join(['N granules:'] + len_str)) frame_map, geotransform = get_raster_as_numpy(metadata_path) geotransform = Affine.from_gdal(*geotransform) @@ -98,13 +93,16 @@ def main(): parser.add_argument( 'end_date', type=str, help='End of secondary date search range to generate tile for (e.g., 20211231)' ) + parser.add_argument( + '--full', action='store_false', help='Use a full linear fit instead of a secant fit (first/last date only)' + ) args = parser.parse_args() args.metadata_path = Path(args.metadata_path) args.begin_date = datetime.strptime(args.begin_date, '%Y%m%d') args.end_date = datetime.strptime(args.end_date, '%Y%m%d') - create_sw_vel_tile(args.metadata_path, args.begin_date, args.end_date) + create_sw_vel_tile(args.metadata_path, args.begin_date, args.end_date, minmax=args.full) if __name__ == '__main__': From 7740e194d4e3dd8ec7a493ab454948900d3877bb Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Fri, 18 Oct 2024 18:21:08 +0000 Subject: [PATCH 06/28] working on numba --- src/opera_disp_tms/generate_sw_vel_tile.py | 121 ++++++++++++++++++++- tests/test_generate_sw_vel_tile.py | 31 ++++++ 2 files changed, 146 insertions(+), 6 deletions(-) create mode 100644 tests/test_generate_sw_vel_tile.py diff --git a/src/opera_disp_tms/generate_sw_vel_tile.py b/src/opera_disp_tms/generate_sw_vel_tile.py index 12d10cc..0c76570 100644 --- a/src/opera_disp_tms/generate_sw_vel_tile.py +++ b/src/opera_disp_tms/generate_sw_vel_tile.py @@ -2,6 +2,7 @@ from datetime import datetime from pathlib import Path +import numba import numpy as np import xarray as xr from osgeo import gdal @@ -20,23 +21,131 @@ def get_years_since_start(datetimes): return yrs_since_start +# def linear_regression_np(data, x): +# n, m, p = data.shape +# slope = np.zeros((m, p)) +# intercept = np.zeros((m, p)) +# +# X = np.vstack([x, np.ones_like(x)]).T +# +# for i in range(m): +# for j in range(p): +# y = data[:, i, j] +# slope[i, j], intercept[i, j] = np.linalg.lstsq(X, y, rcond=None)[0] +# +# return slope, intercept +# +# +# @numba.njit +# def linear_regression_leastsquares_bad(X, y): +# n = len(X) +# X_mean = np.mean(X) +# y_mean = np.mean(y) +# +# numerator = 0.0 +# denominator = 0.0 +# +# for i in range(n): +# numerator += (X[i] - X_mean) * (y[i] - y_mean) +# denominator += (X[i] - X_mean) ** 2 +# +# if denominator == 0: +# return np.nan, np.nan +# +# slope = numerator / denominator +# intercept = y_mean - slope * X_mean +# return slope, intercept + + +# @numba.njit +# def linear_regression_leastsquares(X, y): +# if np.all(np.isnan(X)): +# return np.nan, np.nan +# +# if np.all(np.isclose(y, y[0], atol=1e-6)): +# return 0.0, y[0] +# +# X_b = np.zeros((X.shape[0], 2), dtype=X.dtype) +# X_b[:, 0] = X +# X_b[:, 1] = np.ones_like(X) +# +# step1 = np.dot(X_b.T, X_b) +# step2 = np.linalg.inv(step1) +# step3 = np.dot(step2, X_b.T) +# theta = np.dot(step3, y) +# slope, intercept = theta[0], theta[1] +# return slope, intercept + + +# @numba.njit +# def linear_regression_leastsquares(X, y): +# if np.all(np.isnan(X)): +# return np.nan, np.nan +# +# if np.all(np.isclose(y, y[0], atol=1e-6)): +# return 0.0, y[0] +# +# X_b = np.zeros((X.shape[0], 2), dtype=X.dtype) +# X_b[:, 0] = X +# X_b[:, 1] = np.ones_like(X) +# slope, intercept = np.linalg.lstsq(X_b, y, rcond=None)[0] +# return slope, intercept + + +@numba.njit +def linear_regression_leastsquares(x, y): + """From: https://www4.stat.ncsu.edu/%7Edickey/courses/st512/pdf_notes/Formulas.pdf""" + if np.all(np.isnan(x)): + return np.nan, np.nan + + n = len(x) + x_mean = np.mean(x) + y_mean = np.mean(y) + x_square_sum = np.sum(x**2) + sum_cross = np.sum(x * y) + x_correction = x_square_sum - (n * x_mean**2) + if np.isclose(x_correction, 0.0, atol=1e-6): + return np.nan, np.nan + xy_correction = sum_cross - (n * x_mean * y_mean) + slope = xy_correction / x_correction + intercept = y_mean - (slope * x_mean) + return slope, intercept + + +@numba.njit(parallel=True) +def parallel_linear_regression(X, y): + n, m, p = X.shape + slope = np.zeros((m, p)) + intercept = np.zeros((m, p)) + for i in numba.prange(m): + for j in numba.prange(p): + slope[i, j], intercept[i, j] = linear_regression_leastsquares(X[:, i, j].copy(), y) + return slope, intercept + + def add_velocity_data_to_array(granules, frame, geotransform, frame_map_array, out_array): bbox = create_buffered_bbox(geotransform.to_gdal(), frame_map_array.shape, 90) # EPSG:3857 is in meters - granule_xrs = [sw_disp.load_sw_disp_granule(x, bbox, frame) for x in granules] cube = xr.concat(granule_xrs, dim='years_since_start') - # ensures output units are m/yr + years_since_start = get_years_since_start([g.attrs['secondary_date'] for g in granule_xrs]) cube = cube.assign_coords(years_since_start=years_since_start) - non_nan_count = cube.count(dim='years_since_start') cube = cube.where(non_nan_count >= 2, np.nan) - linear_fit = cube.polyfit(dim='years_since_start', deg=1) - # multiplying by 100 converts to cm/yr - velocity = xr.Dataset({'velocity': linear_fit.polyfit_coefficients.sel(degree=1) * 100, 'count': non_nan_count}) + cube_array = cube.data * 1_000_000 # convert to um + slope, intercept = parallel_linear_regression(cube_array, cube.years_since_start.data.astype('float64')) + new_coords = {'x': cube.x, 'y': cube.y, 'spatial_ref': cube.spatial_ref} + slope_da = xr.DataArray(slope, dims=('y', 'x'), coords=new_coords) + velocity = xr.Dataset({'velocity': slope_da * 10_000, 'count': non_nan_count}, new_coords) velocity.attrs = cube.attrs + # cube *= 1_000_000 # convert to um/yr + # linear_fit = cube.polyfit(dim='years_since_start', deg=1) + # slope_da = linear_fit.polyfit_coefficients.sel(degree=1) / 10_000 # convert to cm/yr + # velocity = xr.Dataset({'velocity': slope_da, 'count': non_nan_count}) + # velocity.attrs = cube.attrs + velocity_reproj = velocity['velocity'].rio.reproject( 'EPSG:3857', transform=geotransform, shape=frame_map_array.shape ) diff --git a/tests/test_generate_sw_vel_tile.py b/tests/test_generate_sw_vel_tile.py new file mode 100644 index 0000000..cc4a6aa --- /dev/null +++ b/tests/test_generate_sw_vel_tile.py @@ -0,0 +1,31 @@ +import numpy as np + +from opera_disp_tms import generate_sw_vel_tile as sw_vel + + +def test_linear_regression_leastsquares(): + x = np.arange(10, dtype='float32') + y = np.arange(10, dtype='float32') + slope, intercept = sw_vel.linear_regression_leastsquares(x, y) + assert np.isclose(slope, 1.0, atol=1e-6) + assert np.isclose(intercept, 0.0, atol=1e-6) + + slope, intercept = sw_vel.linear_regression_leastsquares(x * -1, y + 2) + assert np.isclose(slope, -1.0, atol=1e-6) + assert np.isclose(intercept, 2.0, atol=1e-6) + + x_1 = np.ones(10, dtype='float32') + slope, intercept = sw_vel.linear_regression_leastsquares(x_1, y) + + +def test_parallel_linear_regression(): + x_col = np.arange(3, dtype='float32') + x = np.ones((3, 5, 5), dtype='float32') * -1 + indexes = np.arange(5) + for i in indexes: + for j in indexes: + x[:, i, j] = x_col + y = np.arange(3, dtype='float32') + slope, intercept = sw_vel.parallel_linear_regression(x, y) + assert np.all(np.isclose(slope, 1.0, atol=1e-6)) + assert np.all(np.isclose(intercept, 0.0, atol=1e-6)) From 53eeeecf843f974fb2ca94c2950fd1458a5adba7 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Fri, 18 Oct 2024 21:28:25 +0000 Subject: [PATCH 07/28] more numba work --- src/opera_disp_tms/generate_sw_vel_tile.py | 97 +++------------------- tests/test_generate_sw_vel_tile.py | 12 ++- 2 files changed, 17 insertions(+), 92 deletions(-) diff --git a/src/opera_disp_tms/generate_sw_vel_tile.py b/src/opera_disp_tms/generate_sw_vel_tile.py index 0c76570..cd0d711 100644 --- a/src/opera_disp_tms/generate_sw_vel_tile.py +++ b/src/opera_disp_tms/generate_sw_vel_tile.py @@ -2,9 +2,9 @@ from datetime import datetime from pathlib import Path -import numba import numpy as np import xarray as xr +from numba import float32, njit, prange from osgeo import gdal from rasterio.transform import Affine @@ -21,82 +21,11 @@ def get_years_since_start(datetimes): return yrs_since_start -# def linear_regression_np(data, x): -# n, m, p = data.shape -# slope = np.zeros((m, p)) -# intercept = np.zeros((m, p)) -# -# X = np.vstack([x, np.ones_like(x)]).T -# -# for i in range(m): -# for j in range(p): -# y = data[:, i, j] -# slope[i, j], intercept[i, j] = np.linalg.lstsq(X, y, rcond=None)[0] -# -# return slope, intercept -# -# -# @numba.njit -# def linear_regression_leastsquares_bad(X, y): -# n = len(X) -# X_mean = np.mean(X) -# y_mean = np.mean(y) -# -# numerator = 0.0 -# denominator = 0.0 -# -# for i in range(n): -# numerator += (X[i] - X_mean) * (y[i] - y_mean) -# denominator += (X[i] - X_mean) ** 2 -# -# if denominator == 0: -# return np.nan, np.nan -# -# slope = numerator / denominator -# intercept = y_mean - slope * X_mean -# return slope, intercept - - -# @numba.njit -# def linear_regression_leastsquares(X, y): -# if np.all(np.isnan(X)): -# return np.nan, np.nan -# -# if np.all(np.isclose(y, y[0], atol=1e-6)): -# return 0.0, y[0] -# -# X_b = np.zeros((X.shape[0], 2), dtype=X.dtype) -# X_b[:, 0] = X -# X_b[:, 1] = np.ones_like(X) -# -# step1 = np.dot(X_b.T, X_b) -# step2 = np.linalg.inv(step1) -# step3 = np.dot(step2, X_b.T) -# theta = np.dot(step3, y) -# slope, intercept = theta[0], theta[1] -# return slope, intercept - - -# @numba.njit -# def linear_regression_leastsquares(X, y): -# if np.all(np.isnan(X)): -# return np.nan, np.nan -# -# if np.all(np.isclose(y, y[0], atol=1e-6)): -# return 0.0, y[0] -# -# X_b = np.zeros((X.shape[0], 2), dtype=X.dtype) -# X_b[:, 0] = X -# X_b[:, 1] = np.ones_like(X) -# slope, intercept = np.linalg.lstsq(X_b, y, rcond=None)[0] -# return slope, intercept - - -@numba.njit +@njit#(float32[::1](float32[::1], float32[::1]), cache=True) def linear_regression_leastsquares(x, y): """From: https://www4.stat.ncsu.edu/%7Edickey/courses/st512/pdf_notes/Formulas.pdf""" - if np.all(np.isnan(x)): - return np.nan, np.nan + if np.all(np.isnan(x)) or np.all(np.isclose(x, x[0], atol=1e-6)): + return np.nan n = len(x) x_mean = np.mean(x) @@ -105,22 +34,20 @@ def linear_regression_leastsquares(x, y): sum_cross = np.sum(x * y) x_correction = x_square_sum - (n * x_mean**2) if np.isclose(x_correction, 0.0, atol=1e-6): - return np.nan, np.nan + return np.nan xy_correction = sum_cross - (n * x_mean * y_mean) slope = xy_correction / x_correction - intercept = y_mean - (slope * x_mean) - return slope, intercept + return slope -@numba.njit(parallel=True) +@njit(parallel=True) def parallel_linear_regression(X, y): n, m, p = X.shape slope = np.zeros((m, p)) - intercept = np.zeros((m, p)) - for i in numba.prange(m): - for j in numba.prange(p): - slope[i, j], intercept[i, j] = linear_regression_leastsquares(X[:, i, j].copy(), y) - return slope, intercept + for i in prange(m): + for j in prange(p): + slope[i, j] = linear_regression_leastsquares(X[:, i, j].copy(), y) + return slope def add_velocity_data_to_array(granules, frame, geotransform, frame_map_array, out_array): @@ -134,7 +61,7 @@ def add_velocity_data_to_array(granules, frame, geotransform, frame_map_array, o cube = cube.where(non_nan_count >= 2, np.nan) cube_array = cube.data * 1_000_000 # convert to um - slope, intercept = parallel_linear_regression(cube_array, cube.years_since_start.data.astype('float64')) + slope, intercept = parallel_linear_regression(cube_array, cube.years_since_start.data.astype('float32')) new_coords = {'x': cube.x, 'y': cube.y, 'spatial_ref': cube.spatial_ref} slope_da = xr.DataArray(slope, dims=('y', 'x'), coords=new_coords) velocity = xr.Dataset({'velocity': slope_da * 10_000, 'count': non_nan_count}, new_coords) diff --git a/tests/test_generate_sw_vel_tile.py b/tests/test_generate_sw_vel_tile.py index cc4a6aa..24d38e2 100644 --- a/tests/test_generate_sw_vel_tile.py +++ b/tests/test_generate_sw_vel_tile.py @@ -6,16 +6,15 @@ def test_linear_regression_leastsquares(): x = np.arange(10, dtype='float32') y = np.arange(10, dtype='float32') - slope, intercept = sw_vel.linear_regression_leastsquares(x, y) + slope = sw_vel.linear_regression_leastsquares(x, y) assert np.isclose(slope, 1.0, atol=1e-6) - assert np.isclose(intercept, 0.0, atol=1e-6) - slope, intercept = sw_vel.linear_regression_leastsquares(x * -1, y + 2) + slope = sw_vel.linear_regression_leastsquares(x * -1, y + 2) assert np.isclose(slope, -1.0, atol=1e-6) - assert np.isclose(intercept, 2.0, atol=1e-6) x_1 = np.ones(10, dtype='float32') - slope, intercept = sw_vel.linear_regression_leastsquares(x_1, y) + slope = sw_vel.linear_regression_leastsquares(x_1, y) + assert np.isnan(slope) def test_parallel_linear_regression(): @@ -26,6 +25,5 @@ def test_parallel_linear_regression(): for j in indexes: x[:, i, j] = x_col y = np.arange(3, dtype='float32') - slope, intercept = sw_vel.parallel_linear_regression(x, y) + slope = sw_vel.parallel_linear_regression(x, y) assert np.all(np.isclose(slope, 1.0, atol=1e-6)) - assert np.all(np.isclose(intercept, 0.0, atol=1e-6)) From 8c8d0ffb444f953ec3a92b454392acfa7639bdd0 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Fri, 18 Oct 2024 21:36:27 +0000 Subject: [PATCH 08/28] kind of working --- src/opera_disp_tms/generate_sw_vel_tile.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/opera_disp_tms/generate_sw_vel_tile.py b/src/opera_disp_tms/generate_sw_vel_tile.py index cd0d711..e9c6331 100644 --- a/src/opera_disp_tms/generate_sw_vel_tile.py +++ b/src/opera_disp_tms/generate_sw_vel_tile.py @@ -21,7 +21,7 @@ def get_years_since_start(datetimes): return yrs_since_start -@njit#(float32[::1](float32[::1], float32[::1]), cache=True) +@njit#(float32(float32[::1], float32[::1]), cache=True) def linear_regression_leastsquares(x, y): """From: https://www4.stat.ncsu.edu/%7Edickey/courses/st512/pdf_notes/Formulas.pdf""" if np.all(np.isnan(x)) or np.all(np.isclose(x, x[0], atol=1e-6)): @@ -60,11 +60,11 @@ def add_velocity_data_to_array(granules, frame, geotransform, frame_map_array, o non_nan_count = cube.count(dim='years_since_start') cube = cube.where(non_nan_count >= 2, np.nan) - cube_array = cube.data * 1_000_000 # convert to um - slope, intercept = parallel_linear_regression(cube_array, cube.years_since_start.data.astype('float32')) + cube_array = (cube.data * 1_000_000).astype('float32') # convert to um + slope = parallel_linear_regression(cube_array, cube.years_since_start.data.astype('float32')) new_coords = {'x': cube.x, 'y': cube.y, 'spatial_ref': cube.spatial_ref} slope_da = xr.DataArray(slope, dims=('y', 'x'), coords=new_coords) - velocity = xr.Dataset({'velocity': slope_da * 10_000, 'count': non_nan_count}, new_coords) + velocity = xr.Dataset({'velocity': slope_da / 10_000, 'count': non_nan_count}, new_coords) velocity.attrs = cube.attrs # cube *= 1_000_000 # convert to um/yr From 9f6a031b68c4e72981037cfbb25348f3911cd86c Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Fri, 18 Oct 2024 21:41:44 +0000 Subject: [PATCH 09/28] little better --- src/opera_disp_tms/generate_sw_vel_tile.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/opera_disp_tms/generate_sw_vel_tile.py b/src/opera_disp_tms/generate_sw_vel_tile.py index e9c6331..7c4cc8a 100644 --- a/src/opera_disp_tms/generate_sw_vel_tile.py +++ b/src/opera_disp_tms/generate_sw_vel_tile.py @@ -21,7 +21,7 @@ def get_years_since_start(datetimes): return yrs_since_start -@njit#(float32(float32[::1], float32[::1]), cache=True) +@njit # (float32(float32[::1], float32[::1]), cache=True) def linear_regression_leastsquares(x, y): """From: https://www4.stat.ncsu.edu/%7Edickey/courses/st512/pdf_notes/Formulas.pdf""" if np.all(np.isnan(x)) or np.all(np.isclose(x, x[0], atol=1e-6)): @@ -60,11 +60,11 @@ def add_velocity_data_to_array(granules, frame, geotransform, frame_map_array, o non_nan_count = cube.count(dim='years_since_start') cube = cube.where(non_nan_count >= 2, np.nan) - cube_array = (cube.data * 1_000_000).astype('float32') # convert to um + cube_array = cube.data.astype('float32') # convert to um slope = parallel_linear_regression(cube_array, cube.years_since_start.data.astype('float32')) new_coords = {'x': cube.x, 'y': cube.y, 'spatial_ref': cube.spatial_ref} slope_da = xr.DataArray(slope, dims=('y', 'x'), coords=new_coords) - velocity = xr.Dataset({'velocity': slope_da / 10_000, 'count': non_nan_count}, new_coords) + velocity = xr.Dataset({'velocity': slope_da * 100, 'count': non_nan_count}, new_coords) velocity.attrs = cube.attrs # cube *= 1_000_000 # convert to um/yr From 40861b963c68bbc8b231afbffa796993ef9b6b07 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 21 Oct 2024 13:04:23 +0000 Subject: [PATCH 10/28] Bump ASFHyP3/actions from 0.11.2 to 0.12.0 Bumps [ASFHyP3/actions](https://github.com/asfhyp3/actions) from 0.11.2 to 0.12.0. - [Release notes](https://github.com/asfhyp3/actions/releases) - [Changelog](https://github.com/ASFHyP3/actions/blob/develop/CHANGELOG.md) - [Commits](https://github.com/asfhyp3/actions/compare/v0.11.2...v0.12.0) --- updated-dependencies: - dependency-name: ASFHyP3/actions dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/changelog.yml | 2 +- .github/workflows/labeled-pr.yml | 2 +- .github/workflows/release-checklist-comment.yml | 2 +- .github/workflows/release.yml | 2 +- .github/workflows/static-analysis.yml | 4 ++-- .github/workflows/tag-version.yml | 2 +- .github/workflows/test.yml | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/changelog.yml b/.github/workflows/changelog.yml index 1116782..febc8cd 100644 --- a/.github/workflows/changelog.yml +++ b/.github/workflows/changelog.yml @@ -14,4 +14,4 @@ on: jobs: call-changelog-check-workflow: # Docs: https://github.com/ASFHyP3/actions - uses: ASFHyP3/actions/.github/workflows/reusable-changelog-check.yml@v0.11.2 + uses: ASFHyP3/actions/.github/workflows/reusable-changelog-check.yml@v0.12.0 diff --git a/.github/workflows/labeled-pr.yml b/.github/workflows/labeled-pr.yml index 50e66b7..dde2ec5 100644 --- a/.github/workflows/labeled-pr.yml +++ b/.github/workflows/labeled-pr.yml @@ -13,4 +13,4 @@ on: jobs: call-labeled-pr-check-workflow: # Docs: https://github.com/ASFHyP3/actions - uses: ASFHyP3/actions/.github/workflows/reusable-labeled-pr-check.yml@v0.11.2 + uses: ASFHyP3/actions/.github/workflows/reusable-labeled-pr-check.yml@v0.12.0 diff --git a/.github/workflows/release-checklist-comment.yml b/.github/workflows/release-checklist-comment.yml index 26d10fd..eab5714 100644 --- a/.github/workflows/release-checklist-comment.yml +++ b/.github/workflows/release-checklist-comment.yml @@ -10,7 +10,7 @@ on: jobs: call-release-workflow: # Docs: https://github.com/ASFHyP3/actions - uses: ASFHyP3/actions/.github/workflows/reusable-release-checklist-comment.yml@v0.11.2 + uses: ASFHyP3/actions/.github/workflows/reusable-release-checklist-comment.yml@v0.12.0 permissions: pull-requests: write secrets: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 8a461f6..05a7511 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -8,7 +8,7 @@ on: jobs: call-release-workflow: # Docs: https://github.com/ASFHyP3/actions - uses: ASFHyP3/actions/.github/workflows/reusable-release.yml@v0.11.2 + uses: ASFHyP3/actions/.github/workflows/reusable-release.yml@v0.12.0 with: release_prefix: OPERA-DISP-TMS release_branch: main diff --git a/.github/workflows/static-analysis.yml b/.github/workflows/static-analysis.yml index cdee5d7..8d6d461 100644 --- a/.github/workflows/static-analysis.yml +++ b/.github/workflows/static-analysis.yml @@ -5,10 +5,10 @@ on: push jobs: call-secrets-analysis-workflow: # Docs: https://github.com/ASFHyP3/actions - uses: ASFHyP3/actions/.github/workflows/reusable-secrets-analysis.yml@v0.11.2 + uses: ASFHyP3/actions/.github/workflows/reusable-secrets-analysis.yml@v0.12.0 call-flake8-workflow: # Docs: https://github.com/ASFHyP3/actions - uses: ASFHyP3/actions/.github/workflows/reusable-flake8.yml@v0.11.2 + uses: ASFHyP3/actions/.github/workflows/reusable-flake8.yml@v0.12.0 with: local_package_names: opera_disp_tms diff --git a/.github/workflows/tag-version.yml b/.github/workflows/tag-version.yml index da2a91c..4c4ddea 100644 --- a/.github/workflows/tag-version.yml +++ b/.github/workflows/tag-version.yml @@ -8,7 +8,7 @@ on: jobs: call-bump-version-workflow: # Docs: https://github.com/ASFHyP3/actions - uses: ASFHyP3/actions/.github/workflows/reusable-bump-version.yml@v0.11.2 + uses: ASFHyP3/actions/.github/workflows/reusable-bump-version.yml@v0.12.0 with: user: tools-bot email: UAF-asf-apd@alaska.edu diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 961e2f5..3bfd784 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,7 +13,7 @@ on: jobs: call-pytest-workflow: # Docs: https://github.com/ASFHyP3/actions - uses: ASFHyP3/actions/.github/workflows/reusable-pytest.yml@v0.11.2 + uses: ASFHyP3/actions/.github/workflows/reusable-pytest.yml@v0.12.0 with: local_package_name: opera_disp_tms python_versions: >- From 0c294810f050b3260588bd855b7223317c3814df Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Mon, 21 Oct 2024 14:21:50 +0000 Subject: [PATCH 11/28] working velocity --- src/opera_disp_tms/generate_sw_vel_tile.py | 96 +++++++++++++--------- tests/test_generate_sw_vel_tile.py | 33 ++++++-- 2 files changed, 81 insertions(+), 48 deletions(-) diff --git a/src/opera_disp_tms/generate_sw_vel_tile.py b/src/opera_disp_tms/generate_sw_vel_tile.py index 7c4cc8a..f1065f5 100644 --- a/src/opera_disp_tms/generate_sw_vel_tile.py +++ b/src/opera_disp_tms/generate_sw_vel_tile.py @@ -1,10 +1,11 @@ import argparse from datetime import datetime from pathlib import Path +from typing import List import numpy as np import xarray as xr -from numba import float32, njit, prange +from numba import njit, prange from osgeo import gdal from rasterio.transform import Affine @@ -15,63 +16,80 @@ gdal.UseExceptions() -def get_years_since_start(datetimes): +def get_years_since_start(datetimes: List[datetime]) -> np.ndarray: + """Get the number of years since the earliest date as a list of floats""" start = min(datetimes) - yrs_since_start = [(date - start).days / 365.25 for date in datetimes] + yrs_since_start = np.array([(date - start).days / 365.25 for date in datetimes]) return yrs_since_start -@njit # (float32(float32[::1], float32[::1]), cache=True) -def linear_regression_leastsquares(x, y): - """From: https://www4.stat.ncsu.edu/%7Edickey/courses/st512/pdf_notes/Formulas.pdf""" - if np.all(np.isnan(x)) or np.all(np.isclose(x, x[0], atol=1e-6)): - return np.nan - - n = len(x) - x_mean = np.mean(x) - y_mean = np.mean(y) - x_square_sum = np.sum(x**2) - sum_cross = np.sum(x * y) - x_correction = x_square_sum - (n * x_mean**2) - if np.isclose(x_correction, 0.0, atol=1e-6): - return np.nan - xy_correction = sum_cross - (n * x_mean * y_mean) - slope = xy_correction / x_correction - return slope - - -@njit(parallel=True) -def parallel_linear_regression(X, y): - n, m, p = X.shape - slope = np.zeros((m, p)) +@njit(cache=True) +def linear_regression_leastsquares(x: np.ndarray, y: np.ndarray) -> tuple: + """Based on the scipy.stats.linregress implementation: + https://github.com/scipy/scipy/blob/v1.14.1/scipy/stats/_stats_py.py#L10752-L10947 + + Args: + x: The independent variable as a 1D numpy array + y: The dependent variable as a 1D numpy array + + Returns: + tuple: The slope and intercept of the linear regression + """ + non_nan_indices = np.where(~np.isnan(x)) + x = x[non_nan_indices] + y = y[non_nan_indices] + + if len(x) < 2: + return np.nan, np.nan + + if np.amax(x) == np.amin(x) and len(x) > 1: + return np.nan, np.nan + + xmean = np.mean(x) + ymean = np.mean(y) + ssxm, ssxym, _, ssym = np.cov(x, y, bias=1).flat + slope = ssxym / ssxm + intercept = ymean - slope * xmean + return slope, intercept + + +@njit(parallel=True, cache=True) +def parallel_linear_regression(x: np.ndarray, y: np.ndarray) -> np.ndarray: + """Run linear regresions in parallel for each pixel in the x array + + Args: + x: A 3D array of independent variables with dimensions (time, y, x) + y: A 1D array of dependent variables (i.e., time steps) + """ + n, m, p = x.shape + slope_array = np.zeros((m, p)) for i in prange(m): for j in prange(p): - slope[i, j] = linear_regression_leastsquares(X[:, i, j].copy(), y) - return slope + slope, intercept = linear_regression_leastsquares(x[:, i, j].copy(), y) + slope_array[i, j] = slope + return slope_array def add_velocity_data_to_array(granules, frame, geotransform, frame_map_array, out_array): bbox = create_buffered_bbox(geotransform.to_gdal(), frame_map_array.shape, 90) # EPSG:3857 is in meters granule_xrs = [sw_disp.load_sw_disp_granule(x, bbox, frame) for x in granules] cube = xr.concat(granule_xrs, dim='years_since_start') + print('running regression...') years_since_start = get_years_since_start([g.attrs['secondary_date'] for g in granule_xrs]) cube = cube.assign_coords(years_since_start=years_since_start) - non_nan_count = cube.count(dim='years_since_start') - cube = cube.where(non_nan_count >= 2, np.nan) - - cube_array = cube.data.astype('float32') # convert to um - slope = parallel_linear_regression(cube_array, cube.years_since_start.data.astype('float32')) new_coords = {'x': cube.x, 'y': cube.y, 'spatial_ref': cube.spatial_ref} + + slope = parallel_linear_regression(cube.data.astype('float64'), cube.years_since_start.data.astype('float64')) slope_da = xr.DataArray(slope, dims=('y', 'x'), coords=new_coords) - velocity = xr.Dataset({'velocity': slope_da * 100, 'count': non_nan_count}, new_coords) - velocity.attrs = cube.attrs - # cube *= 1_000_000 # convert to um/yr + # non_nan_count = cube.count(dim='years_since_start') + # cube = cube.where(non_nan_count >= 2, np.nan) # linear_fit = cube.polyfit(dim='years_since_start', deg=1) - # slope_da = linear_fit.polyfit_coefficients.sel(degree=1) / 10_000 # convert to cm/yr - # velocity = xr.Dataset({'velocity': slope_da, 'count': non_nan_count}) - # velocity.attrs = cube.attrs + # slope_da = linear_fit.polyfit_coefficients.sel(degree=1) * 100 + + velocity = xr.Dataset({'velocity': slope_da}, new_coords) + velocity.attrs = cube.attrs velocity_reproj = velocity['velocity'].rio.reproject( 'EPSG:3857', transform=geotransform, shape=frame_map_array.shape diff --git a/tests/test_generate_sw_vel_tile.py b/tests/test_generate_sw_vel_tile.py index 24d38e2..0cbf56f 100644 --- a/tests/test_generate_sw_vel_tile.py +++ b/tests/test_generate_sw_vel_tile.py @@ -4,26 +4,41 @@ def test_linear_regression_leastsquares(): - x = np.arange(10, dtype='float32') - y = np.arange(10, dtype='float32') - slope = sw_vel.linear_regression_leastsquares(x, y) + x = np.arange(10, dtype='float64') + y = np.arange(10, dtype='float64') + slope, intercept = sw_vel.linear_regression_leastsquares(x, y) assert np.isclose(slope, 1.0, atol=1e-6) + assert np.isclose(intercept, 0.0, atol=1e-6) - slope = sw_vel.linear_regression_leastsquares(x * -1, y + 2) + slope, intercept = sw_vel.linear_regression_leastsquares(x * -1, y + 2) assert np.isclose(slope, -1.0, atol=1e-6) + assert np.isclose(intercept, 2.0, atol=1e-6) - x_1 = np.ones(10, dtype='float32') - slope = sw_vel.linear_regression_leastsquares(x_1, y) + slope, intercept = sw_vel.linear_regression_leastsquares(x[0:2] * 1e-6, y[0:2] * 1e-6) + assert np.isclose(slope, 1.0, atol=1e-6) + assert np.isclose(intercept, 0.0, atol=1e-6) + + x_1 = np.ones(10, dtype='float64') + slope, intercept = sw_vel.linear_regression_leastsquares(x_1, y) assert np.isnan(slope) + assert np.isnan(intercept) + + x_nan = x.copy() + x_nan[4] = np.nan + slope, intercept = sw_vel.linear_regression_leastsquares(x_nan, y) + assert np.isclose(slope, 1.0, atol=1e-6) + assert np.isclose(intercept, 0.0, atol=1e-6) + + def test_parallel_linear_regression(): - x_col = np.arange(3, dtype='float32') - x = np.ones((3, 5, 5), dtype='float32') * -1 + x_col = np.arange(3, dtype='float64') + x = np.ones((3, 5, 5), dtype='float64') * -1 indexes = np.arange(5) for i in indexes: for j in indexes: x[:, i, j] = x_col - y = np.arange(3, dtype='float32') + y = np.arange(3, dtype='float64') slope = sw_vel.parallel_linear_regression(x, y) assert np.all(np.isclose(slope, 1.0, atol=1e-6)) From e3c59308f926d4704ec45b1d62dfb8fd7977d903 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Mon, 21 Oct 2024 14:36:20 +0000 Subject: [PATCH 12/28] cleanup and fix tests --- src/opera_disp_tms/generate_sw_vel_tile.py | 35 +++++++++++++++++++--- tests/test_generate_sw_disp_tile.py | 15 +++++----- tests/test_generate_sw_vel_tile.py | 16 ++++++++-- 3 files changed, 53 insertions(+), 13 deletions(-) diff --git a/src/opera_disp_tms/generate_sw_vel_tile.py b/src/opera_disp_tms/generate_sw_vel_tile.py index f1065f5..24fad10 100644 --- a/src/opera_disp_tms/generate_sw_vel_tile.py +++ b/src/opera_disp_tms/generate_sw_vel_tile.py @@ -1,7 +1,7 @@ import argparse from datetime import datetime from pathlib import Path -from typing import List +from typing import Iterable, List import numpy as np import xarray as xr @@ -10,6 +10,7 @@ from rasterio.transform import Affine from opera_disp_tms import generate_sw_disp_tile as sw_disp +from opera_disp_tms.find_california_dataset import Granule from opera_disp_tms.utils import create_buffered_bbox, get_raster_as_numpy @@ -17,7 +18,15 @@ def get_years_since_start(datetimes: List[datetime]) -> np.ndarray: - """Get the number of years since the earliest date as a list of floats""" + """Get the number of years since the earliest date as an array of floats. + Order of the datetimes is preserved. + + Args: + datetimes: A list of datetime objects + + Returns: + np.ndarray: The number of years since the earliest date as a list of floats + """ start = min(datetimes) yrs_since_start = np.array([(date - start).days / 365.25 for date in datetimes]) return yrs_since_start @@ -70,11 +79,28 @@ def parallel_linear_regression(x: np.ndarray, y: np.ndarray) -> np.ndarray: return slope_array -def add_velocity_data_to_array(granules, frame, geotransform, frame_map_array, out_array): +def add_velocity_data_to_array( + granules: Iterable[Granule], + frame: sw_disp.FrameMeta, + geotransform: Affine, + frame_map_array: np.ndarray, + out_array: np.ndarray, +) -> np.ndarray: + """Create and add velocity data to an array using granules source from a single frame. + + Args: + granules: A list of granule objects + frame: The frame metadata + geotransform: The geotransform of the frame + frame_map_array: The frame map as a numpy array + out_array: The array to add the velocity data to + + Returns: + np.ndarray: The updated array + """ bbox = create_buffered_bbox(geotransform.to_gdal(), frame_map_array.shape, 90) # EPSG:3857 is in meters granule_xrs = [sw_disp.load_sw_disp_granule(x, bbox, frame) for x in granules] cube = xr.concat(granule_xrs, dim='years_since_start') - print('running regression...') years_since_start = get_years_since_start([g.attrs['secondary_date'] for g in granule_xrs]) cube = cube.assign_coords(years_since_start=years_since_start) @@ -83,6 +109,7 @@ def add_velocity_data_to_array(granules, frame, geotransform, frame_map_array, o slope = parallel_linear_regression(cube.data.astype('float64'), cube.years_since_start.data.astype('float64')) slope_da = xr.DataArray(slope, dims=('y', 'x'), coords=new_coords) + # None-numba version is 4x slower # non_nan_count = cube.count(dim='years_since_start') # cube = cube.where(non_nan_count >= 2, np.nan) # linear_fit = cube.polyfit(dim='years_since_start', deg=1) diff --git a/tests/test_generate_sw_disp_tile.py b/tests/test_generate_sw_disp_tile.py index afeeaf2..545eddf 100644 --- a/tests/test_generate_sw_disp_tile.py +++ b/tests/test_generate_sw_disp_tile.py @@ -60,17 +60,18 @@ def test_find_needed_granules(): GranuleStub(frame_id=2, secondary_date=datetime(2021, 1, 2)), ] with mock.patch('opera_disp_tms.generate_sw_disp_tile.find_california_dataset', return_value=granules): - needed_granules = sw.find_needed_granules([1], datetime(2021, 1, 1), datetime(2021, 1, 2)) + needed_granules = sw.find_needed_granules([1], datetime(2021, 1, 1), datetime(2021, 1, 3), strategy='max') - assert len(needed_granules) == 1 - assert needed_granules[0] == granules[1] + assert list(needed_granules.keys()) == [1] + assert len(needed_granules[1]) == 1 + assert needed_granules[1] == [granules[1]] with mock.patch('opera_disp_tms.generate_sw_disp_tile.find_california_dataset', return_value=granules): - needed_granules = sw.find_needed_granules([1, 2], datetime(2021, 1, 1), datetime(2021, 1, 2)) + needed_granules = sw.find_needed_granules([1, 2], datetime(2021, 1, 1), datetime(2021, 1, 2), strategy='max') - assert len(needed_granules) == 2 - assert needed_granules[0] == granules[1] - assert needed_granules[1] == granules[3] + assert list(needed_granules.keys()) == [1, 2] + assert needed_granules[1] == [granules[1]] + assert needed_granules[2] == [granules[3]] def test_create_product_name(): diff --git a/tests/test_generate_sw_vel_tile.py b/tests/test_generate_sw_vel_tile.py index 0cbf56f..e67b99a 100644 --- a/tests/test_generate_sw_vel_tile.py +++ b/tests/test_generate_sw_vel_tile.py @@ -1,8 +1,22 @@ +from datetime import datetime + import numpy as np from opera_disp_tms import generate_sw_vel_tile as sw_vel +def test_get_years_since_start(): + datetimes = [datetime(2020, 1, 1), datetime(2020, 1, 2), datetime(2020, 1, 3)] + assert isinstance(sw_vel.get_years_since_start(datetimes), np.ndarray) + assert np.allclose(sw_vel.get_years_since_start(datetimes), [0.0, 1 / 365.25, 2 / 365.25]) + + datetimes = [datetime(2020, 1, 1), datetime(2020, 1, 1), datetime(2020, 1, 1)] + assert np.allclose(sw_vel.get_years_since_start(datetimes), [0.0, 0.0, 0.0]) + + datetimes = [datetime(2020, 1, 1), datetime(2021, 1, 1), datetime(2022, 1, 1)] + assert np.allclose(sw_vel.get_years_since_start(datetimes).round(2), [0.0, 1.0, 2.0]) + + def test_linear_regression_leastsquares(): x = np.arange(10, dtype='float64') y = np.arange(10, dtype='float64') @@ -30,8 +44,6 @@ def test_linear_regression_leastsquares(): assert np.isclose(intercept, 0.0, atol=1e-6) - - def test_parallel_linear_regression(): x_col = np.arange(3, dtype='float64') x = np.ones((3, 5, 5), dtype='float64') * -1 From 0844c958546bb52cc74c17a43616aae730c63686 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Mon, 21 Oct 2024 14:39:58 +0000 Subject: [PATCH 13/28] update changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c34044b..3ac6613 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [PEP 440](https://www.python.org/dev/peps/pep-0440/) and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.3.0] +### Added +* Ability to generate SW Disp velocity tiles ## [0.2.0] ### Added From 55d9fe206a7840765e892d52802834480bcd4064 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Mon, 21 Oct 2024 21:23:57 +0000 Subject: [PATCH 14/28] fully working --- environment.yml | 1 + pyproject.toml | 1 + src/opera_disp_tms/generate_sw_vel_tile.py | 22 ++++++++-------------- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/environment.yml b/environment.yml index 163abc2..7d6415b 100644 --- a/environment.yml +++ b/environment.yml @@ -26,5 +26,6 @@ dependencies: - h5netcdf - xarray - rioxarray + - numba - s3fs - cachetools diff --git a/pyproject.toml b/pyproject.toml index 8a5d15d..0d41ebd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ dependencies = [ "h5netcdf", "rioxarray", "xarray", + "numba", "cachetools", ] dynamic = ["version", "readme"] diff --git a/src/opera_disp_tms/generate_sw_vel_tile.py b/src/opera_disp_tms/generate_sw_vel_tile.py index 24fad10..3919804 100644 --- a/src/opera_disp_tms/generate_sw_vel_tile.py +++ b/src/opera_disp_tms/generate_sw_vel_tile.py @@ -32,7 +32,7 @@ def get_years_since_start(datetimes: List[datetime]) -> np.ndarray: return yrs_since_start -@njit(cache=True) +@njit def linear_regression_leastsquares(x: np.ndarray, y: np.ndarray) -> tuple: """Based on the scipy.stats.linregress implementation: https://github.com/scipy/scipy/blob/v1.14.1/scipy/stats/_stats_py.py#L10752-L10947 @@ -62,19 +62,19 @@ def linear_regression_leastsquares(x: np.ndarray, y: np.ndarray) -> tuple: return slope, intercept -@njit(parallel=True, cache=True) +@njit(parallel=True) def parallel_linear_regression(x: np.ndarray, y: np.ndarray) -> np.ndarray: """Run linear regresions in parallel for each pixel in the x array Args: - x: A 3D array of independent variables with dimensions (time, y, x) - y: A 1D array of dependent variables (i.e., time steps) + x: A 1D array of independent variables (i.e., time steps) + y: A 3D array of dependent variables with dimensions (time, y, x) """ - n, m, p = x.shape + n, m, p = y.shape slope_array = np.zeros((m, p)) for i in prange(m): for j in prange(p): - slope, intercept = linear_regression_leastsquares(x[:, i, j].copy(), y) + slope, intercept = linear_regression_leastsquares(x, y[:, i, j].copy()) slope_array[i, j] = slope return slope_array @@ -106,15 +106,9 @@ def add_velocity_data_to_array( cube = cube.assign_coords(years_since_start=years_since_start) new_coords = {'x': cube.x, 'y': cube.y, 'spatial_ref': cube.spatial_ref} - slope = parallel_linear_regression(cube.data.astype('float64'), cube.years_since_start.data.astype('float64')) + # Using xarray's polyfit is 13x slower when running a regression for 44 time steps + slope = parallel_linear_regression(cube.years_since_start.data.astype('float64'), cube.data.astype('float64')) slope_da = xr.DataArray(slope, dims=('y', 'x'), coords=new_coords) - - # None-numba version is 4x slower - # non_nan_count = cube.count(dim='years_since_start') - # cube = cube.where(non_nan_count >= 2, np.nan) - # linear_fit = cube.polyfit(dim='years_since_start', deg=1) - # slope_da = linear_fit.polyfit_coefficients.sel(degree=1) * 100 - velocity = xr.Dataset({'velocity': slope_da}, new_coords) velocity.attrs = cube.attrs From 0cc0950b2a8251fb425b68aa22cde19d91dc2cea Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Mon, 21 Oct 2024 17:04:47 -0500 Subject: [PATCH 15/28] fix test --- tests/test_generate_sw_vel_tile.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_generate_sw_vel_tile.py b/tests/test_generate_sw_vel_tile.py index e67b99a..9abf33c 100644 --- a/tests/test_generate_sw_vel_tile.py +++ b/tests/test_generate_sw_vel_tile.py @@ -45,12 +45,12 @@ def test_linear_regression_leastsquares(): def test_parallel_linear_regression(): - x_col = np.arange(3, dtype='float64') - x = np.ones((3, 5, 5), dtype='float64') * -1 + y_col = np.arange(3, dtype='float64') + y = np.ones((3, 5, 5), dtype='float64') * -1 indexes = np.arange(5) for i in indexes: for j in indexes: - x[:, i, j] = x_col - y = np.arange(3, dtype='float64') + y[:, i, j] = y_col + x = np.arange(3, dtype='float64') slope = sw_vel.parallel_linear_regression(x, y) assert np.all(np.isclose(slope, 1.0, atol=1e-6)) From 7815560ddf6c99dca6f93324e2ff51a9bb621aa1 Mon Sep 17 00:00:00 2001 From: Forrest Williams <31411324+forrestfwilliams@users.noreply.github.com> Date: Tue, 22 Oct 2024 07:27:39 -0500 Subject: [PATCH 16/28] Update src/opera_disp_tms/generate_sw_disp_tile.py Co-authored-by: Jacquelyn Smale <34557291+jacquelynsmale@users.noreply.github.com> --- src/opera_disp_tms/generate_sw_disp_tile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/opera_disp_tms/generate_sw_disp_tile.py b/src/opera_disp_tms/generate_sw_disp_tile.py index 99ed1e4..6473b42 100644 --- a/src/opera_disp_tms/generate_sw_disp_tile.py +++ b/src/opera_disp_tms/generate_sw_disp_tile.py @@ -72,7 +72,7 @@ def find_needed_granules( frame_ids: The frame ids to generate the tile for begin_date: Start of secondary date search range to generate tile for end_date: End of secondary date search range to generate tile for - strategy: Selection strategy for granules within search range ("minmax" or "all") + strategy: Selection strategy for granules within search date range ("max", "minmax" or "all") - Use "max" to get first granule - Use "minmax" to get first and last granules - Use "all" to get all granules From 3c0dbeca0d490a0f8efbc229822d4ebfe587aa78 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Tue, 22 Oct 2024 16:12:37 +0000 Subject: [PATCH 17/28] optimize fsspec settings --- src/opera_disp_tms/s3_xarray.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/opera_disp_tms/s3_xarray.py b/src/opera_disp_tms/s3_xarray.py index bb58322..b10c2c2 100644 --- a/src/opera_disp_tms/s3_xarray.py +++ b/src/opera_disp_tms/s3_xarray.py @@ -11,13 +11,13 @@ IO_PARAMS = { 'fsspec_params': { - # "skip_instance_cache": True - 'cache_type': 'blockcache', # or "first" with enough space + 'skip_instance_cache': True, + 'cache_type': 'first', # or "first" with enough space 'block_size': 8 * 1024 * 1024, # could be bigger }, 'h5py_params': { 'driver_kwds': { # only recent versions of xarray and h5netcdf allow this correctly - 'page_buf_size': 16 * 1024 * 1024, # this one only works in repacked files + 'page_buf_size': 32 * 1024 * 1024, # this one only works in repacked files 'rdcc_nbytes': 8 * 1024 * 1024, # this one is to read the chunks } }, From 1686caf4ff15bbeec0c0812444a7fbe6bad9d2cd Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Tue, 22 Oct 2024 17:59:29 +0000 Subject: [PATCH 18/28] small performance enhancement --- src/opera_disp_tms/generate_sw_disp_tile.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/opera_disp_tms/generate_sw_disp_tile.py b/src/opera_disp_tms/generate_sw_disp_tile.py index 99ed1e4..f4ad759 100644 --- a/src/opera_disp_tms/generate_sw_disp_tile.py +++ b/src/opera_disp_tms/generate_sw_disp_tile.py @@ -130,6 +130,7 @@ def load_sw_disp_granule(granule: Granule, bbox: Iterable[int], frame: FrameMeta raise NotImplementedError('Granule reference date does not match metadata tile, this is not supported.') granule_xr = granule_xr.rio.clip_box(*bbox, crs='EPSG:3857') + granule_xr = granule_xr.load() valid_data_mask = granule_xr['connected_component_labels'] > 0 sw_cumul_disp_xr = granule_xr['short_wavelength_displacement'].where(valid_data_mask, np.nan) sw_cumul_disp_xr.attrs = granule_xr.attrs From 081f61dccb06240d8746ad807f67080697c2723b Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Tue, 22 Oct 2024 19:52:40 +0000 Subject: [PATCH 19/28] move create_product_name --- src/opera_disp_tms/generate_sw_disp_tile.py | 24 ++------------------- src/opera_disp_tms/generate_sw_vel_tile.py | 4 ++-- src/opera_disp_tms/utils.py | 20 +++++++++++++++++ tests/test_generate_sw_disp_tile.py | 8 ------- tests/test_utils.py | 8 +++++++ 5 files changed, 32 insertions(+), 32 deletions(-) diff --git a/src/opera_disp_tms/generate_sw_disp_tile.py b/src/opera_disp_tms/generate_sw_disp_tile.py index 6039b0c..12998aa 100644 --- a/src/opera_disp_tms/generate_sw_disp_tile.py +++ b/src/opera_disp_tms/generate_sw_disp_tile.py @@ -12,7 +12,7 @@ from opera_disp_tms.find_california_dataset import Granule, find_california_dataset from opera_disp_tms.s3_xarray import open_opera_disp_granule -from opera_disp_tms.utils import DATE_FORMAT, create_buffered_bbox, get_raster_as_numpy, round_to_day +from opera_disp_tms.utils import DATE_FORMAT, create_buffered_bbox, create_tile_name, get_raster_as_numpy, round_to_day gdal.UseExceptions() @@ -101,26 +101,6 @@ def find_needed_granules( return needed_granules -def create_product_name( - metadata_name: str, begin_date: datetime, end_date: datetime, prod_type: str = 'SW_CUMUL_DISP' -) -> str: - """Create a product name for a short wavelength cumulative displacement tile - Takes the form: SW_CUMUL_DISP_YYYYMMDD_YYYYMMDD_DIRECTION_TILECOORDS.tif - - Args: - metadata_name: The name of the metadata file - begin_date: Start of secondary date search range to generate tile for - end_date: End of secondary date search range to generate tile for - prod_type: Product type prefix to use - """ - _, flight_direction, tile_coordinates = metadata_name.split('_') - date_fmt = '%Y%m%d' - begin_date = datetime.strftime(begin_date, date_fmt) - end_date = datetime.strftime(end_date, date_fmt) - name = '_'.join([prod_type, begin_date, end_date, flight_direction, tile_coordinates]) - return name - - def load_sw_disp_granule(granule: Granule, bbox: Iterable[int], frame: FrameMeta) -> xr.DataArray: datasets = ['short_wavelength_displacement', 'connected_component_labels'] granule_xr = open_opera_disp_granule(granule.s3_uri, datasets) @@ -181,7 +161,7 @@ def create_sw_disp_tile(metadata_path: Path, begin_date: datetime, end_date: dat if begin_date > end_date: raise ValueError('Begin date must be before end date') - product_name = create_product_name(metadata_path.name, begin_date, end_date) + product_name = create_tile_name(metadata_path.name, begin_date, end_date) product_path = Path.cwd() / product_name print(f'Generating tile {product_name}') diff --git a/src/opera_disp_tms/generate_sw_vel_tile.py b/src/opera_disp_tms/generate_sw_vel_tile.py index 3919804..1dfaff5 100644 --- a/src/opera_disp_tms/generate_sw_vel_tile.py +++ b/src/opera_disp_tms/generate_sw_vel_tile.py @@ -11,7 +11,7 @@ from opera_disp_tms import generate_sw_disp_tile as sw_disp from opera_disp_tms.find_california_dataset import Granule -from opera_disp_tms.utils import create_buffered_bbox, get_raster_as_numpy +from opera_disp_tms.utils import create_buffered_bbox, create_tile_name, get_raster_as_numpy gdal.UseExceptions() @@ -126,7 +126,7 @@ def create_sw_vel_tile(metadata_path: Path, begin_date: datetime, end_date: date if begin_date > end_date: raise ValueError('Begin date must be before end date') - product_name = sw_disp.create_product_name(metadata_path.name, begin_date, end_date, 'SW_VELOCITY') + product_name = create_tile_name(metadata_path.name, begin_date, end_date, 'SW_VELOCITY') product_path = Path.cwd() / product_name print(f'Generating tile {product_name}') diff --git a/src/opera_disp_tms/utils.py b/src/opera_disp_tms/utils.py index 5b0d3d0..bd7cf2b 100644 --- a/src/opera_disp_tms/utils.py +++ b/src/opera_disp_tms/utils.py @@ -142,3 +142,23 @@ def validate_bbox(bbox: Iterable[int]) -> None: if bbox[1] > bbox[3]: raise ValueError('Bounding box miny is greater than maxy') + + +def create_tile_name( + metadata_name: str, begin_date: datetime, end_date: datetime, prod_type: str = 'SW_CUMUL_DISP' +) -> str: + """Create a product name for a short wavelength cumulative displacement tile + Takes the form: SW_CUMUL_DISP_YYYYMMDD_YYYYMMDD_DIRECTION_TILECOORDS.tif + + Args: + metadata_name: The name of the metadata file + begin_date: Start of secondary date search range to generate tile for + end_date: End of secondary date search range to generate tile for + prod_type: Product type prefix to use + """ + _, flight_direction, tile_coordinates = metadata_name.split('_') + date_fmt = '%Y%m%d' + begin_date = datetime.strftime(begin_date, date_fmt) + end_date = datetime.strftime(end_date, date_fmt) + name = '_'.join([prod_type, begin_date, end_date, flight_direction, tile_coordinates]) + return name diff --git a/tests/test_generate_sw_disp_tile.py b/tests/test_generate_sw_disp_tile.py index 545eddf..f3f4a57 100644 --- a/tests/test_generate_sw_disp_tile.py +++ b/tests/test_generate_sw_disp_tile.py @@ -72,11 +72,3 @@ def test_find_needed_granules(): assert list(needed_granules.keys()) == [1, 2] assert needed_granules[1] == [granules[1]] assert needed_granules[2] == [granules[3]] - - -def test_create_product_name(): - metadata_name = 'METADATA_ASCENDING_N41W124.tif' - begin_date = datetime(2021, 1, 1) - end_date = datetime(2021, 1, 2) - product_name = sw.create_product_name(metadata_name, begin_date, end_date) - assert product_name == 'SW_CUMUL_DISP_20210101_20210102_ASCENDING_N41W124.tif' diff --git a/tests/test_utils.py b/tests/test_utils.py index f225faa..1363998 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -46,3 +46,11 @@ def test_validate_bbox(): ut.validate_bbox([1, 4, 3, 2]) ut.validate_bbox([1, 2, 3, 4]) + + +def test_create_product_name(): + metadata_name = 'METADATA_ASCENDING_N41W124.tif' + begin_date = datetime(2021, 1, 1) + end_date = datetime(2021, 1, 2) + product_name = ut.create_tile_name(metadata_name, begin_date, end_date) + assert product_name == 'SW_CUMUL_DISP_20210101_20210102_ASCENDING_N41W124.tif' From bbe8196bd670a150b61a926135e302c97deacbf2 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Tue, 22 Oct 2024 19:56:59 +0000 Subject: [PATCH 20/28] update readme --- README.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 95ea3e8..a3b6b89 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ Package for [OPERA DISP](https://www.jpl.nasa.gov/go/opera/products/disp-product To run all commands in sequence use: ```bash -git clone https://github.com/ASFHyP3/opera-disp-tms.git +git clone https://github.com/ASFHyP3/opera-disp-tms.git cd OPERA-DISP-TMS mamba env create -f environment.yml mamba activate opera-disp-tms @@ -66,6 +66,20 @@ The resulting products have the name format: For example: `SW_CUMUL_DISP_20170901_20171231_ASCENDING_W125N42.tif` +### Create a Short Wavelength Velocity tile +The `generate_sw_vel_tile` CLI command can be used to generate a short wavelength velocity geotiff: +```bash +generate_sw_vel_tile METADATA_ASCENDING_W125N42.tif 20170901 20171231 +``` +Where `METADATA_ASCENDING_W125N42.tif` is the path to the frame metadata tile you want to generate a Short Wavelength Velocity tile for, and `20170901`/`20171231` specify the start/end of the secondary date search range to generate a tile for in format `%Y%m%d`. + +By default, the velocity will be calculated with **only** two data points (the first and last dates in the search range), but you can pass the optional `--full` flag to calculate the velocity using all available data in the search range. Using this option will significantly increase processing times. + +The resulting products have the name format: +`SW_VEL_DISP_{start date search range}_{stop data search range}_{orbit direction}_{upper left corner in lon/lat}.tif` +For example: +`SW_VEL_DISP_20170901_20171231_ASCENDING_W125N42.tif` + ### Create a Tile Map The `create_tile_map` CLI command generates a directory with small .png tiles from a list of rasters in a common projection, following the OSGeo Tile Map Service Specification, using gdal2tiles: https://gdal.org/en/latest/programs/gdal2tiles.html From 8f4a14dd6fdd40442e35dc0ea17458fdc8745720 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Tue, 22 Oct 2024 20:10:38 +0000 Subject: [PATCH 21/28] allow creation of velocity tileset using script --- scripts/make_cal_sw_disp_tiles.py | 26 +++++++++++++++++++------- src/opera_disp_tms/create_tile_map.py | 19 +++++++++++-------- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/scripts/make_cal_sw_disp_tiles.py b/scripts/make_cal_sw_disp_tiles.py index 62f3228..e149854 100644 --- a/scripts/make_cal_sw_disp_tiles.py +++ b/scripts/make_cal_sw_disp_tiles.py @@ -3,27 +3,39 @@ from pathlib import Path from opera_disp_tms.create_tile_map import create_tile_map -from opera_disp_tms.generate_sw_disp_tile import create_product_name, create_sw_disp_tile +from opera_disp_tms.generate_sw_disp_tile import create_sw_disp_tile +from opera_disp_tms.generate_sw_vel_tile import create_sw_vel_tile +from opera_disp_tms.utils import create_tile_name -def make_cal_sw_disp_tiles(meta_tile_dir: Path, begin_date: datetime, end_date: datetime): +def make_cal_tiles(meta_tile_dir: Path, workflow: str, begin_date: datetime, end_date: datetime): + workflow_opts = { + 'displacement': ['SW_CUMUL_DISP', create_sw_disp_tile, None], + 'velocity': ['SW_VEL', create_sw_vel_tile, [-0.05, 0.05]], + } + prod_type, create_tile_func, scale_range = workflow_opts[workflow] tiles = list(meta_tile_dir.glob('*.tif')) for tile in tiles: - product_name = Path(create_product_name(tile.name, begin_date, end_date)) + product_name = Path(create_tile_name(tile.name, begin_date, end_date, prod_type)) if product_name.exists(): print(f'{product_name} already exists. Skipping.') continue - create_sw_disp_tile(tile, begin_date, end_date) - sw_disp_tiles = [str(x) for x in Path.cwd().glob('*.tif')] + create_tile_func(tile, begin_date, end_date) + + tiles = [str(x) for x in Path.cwd().glob('*.tif')] tileset_dir = Path.cwd() / 'tiles' tileset_dir.mkdir(exist_ok=True) - create_tile_map(str(tileset_dir), sw_disp_tiles) + + create_tile_map(str(tileset_dir), tiles, scale_range) def main(): parser = argparse.ArgumentParser(description='Create a short wavelength cumulative displacement tile') parser.add_argument('meta_tile_dir', type=str, help='Path to the metadata tile') + parser.add_argument( + 'workflow', type=str, options=['displacement', 'velocity'], help='Workflow to run (displacement or velocity)' + ) parser.add_argument( '--begin_date', type=str, @@ -41,7 +53,7 @@ def main(): args.meta_tile_dir = Path(args.meta_tile_dir) args.begin_date = datetime.strptime(args.begin_date, '%Y%m%d') args.end_date = datetime.strptime(args.end_date, '%Y%m%d') - make_cal_sw_disp_tiles(args.meta_tile_dir, args.begin_date, args.end_date) + make_cal_tiles(args.meta_tile_dir, args.workflow, args.begin_date, args.end_date) if __name__ == '__main__': diff --git a/src/opera_disp_tms/create_tile_map.py b/src/opera_disp_tms/create_tile_map.py index 5927ed4..bcde303 100644 --- a/src/opera_disp_tms/create_tile_map.py +++ b/src/opera_disp_tms/create_tile_map.py @@ -4,9 +4,11 @@ import subprocess import tempfile from pathlib import Path +from typing import List from osgeo import gdal, gdalconst, osr + gdal.UseExceptions() @@ -21,22 +23,20 @@ def get_tile_extent(info: dict, output_folder: Path) -> None: minx, miny = info['cornerCoordinates']['lowerLeft'] maxx, maxy = info['cornerCoordinates']['upperRight'] proj = osr.SpatialReference(info['coordinateSystem']['wkt']) - extent = { - "extent": [minx, miny, maxx, maxy], - "EPSG": int(proj.GetAttrValue('AUTHORITY', 1)) - } + extent = {'extent': [minx, miny, maxx, maxy], 'EPSG': int(proj.GetAttrValue('AUTHORITY', 1))} with open(output_folder / 'extent.json', 'w') as outfile: json.dump(extent, outfile) -def create_tile_map(output_folder: str, input_rasters: list[str]): +def create_tile_map(output_folder: str, input_rasters: list[str], scale_range: List[float] = None) -> None: """Generate a directory with small .png tiles from a list of rasters in a common projection, following the OSGeo Tile Map Service Specification, using gdal2tiles: https://gdal.org/en/latest/programs/gdal2tiles.html Args: output_folder: Path of the output directory to create input_rasters: List of gdal-compatible raster paths to mosaic + scale_range: Optional list of two integers to scale the mosaic by """ with tempfile.NamedTemporaryFile() as mosaic_vrt, tempfile.NamedTemporaryFile() as byte_vrt: # mosaic the input rasters @@ -49,12 +49,15 @@ def create_tile_map(output_folder: str, input_rasters: list[str]): # get bounds of VRT and write to file get_tile_extent(vrt_info, Path(output_folder)) + if scale_range is None: + scale_range = [stats['STATISTICS_MINIMUM'], stats['STATISTICS_MAXIMUM']] + gdal.Translate( destName=byte_vrt.name, srcDS=mosaic_vrt.name, format='VRT', outputType=gdalconst.GDT_Byte, - scaleParams=[[stats['STATISTICS_MINIMUM'], stats['STATISTICS_MAXIMUM']]], + scaleParams=[scale_range], resampleAlg='nearest', ) @@ -75,8 +78,8 @@ def create_tile_map(output_folder: str, input_rasters: list[str]): def main(): parser = argparse.ArgumentParser( description='Generate a directory with small .png tiles from a list of rasters in a common projection, ' - 'following the OSGeo Tile Map Service Specification, using gdal2tiles: ' - 'https://gdal.org/en/latest/programs/gdal2tiles.html' + 'following the OSGeo Tile Map Service Specification, using gdal2tiles: ' + 'https://gdal.org/en/latest/programs/gdal2tiles.html' ) parser.add_argument('output_folder', type=str, help='Path of the output directory to create') parser.add_argument('input_rasters', type=str, nargs='+', help='List of gdal-compatible raster paths to mosaic') From 046e2fe62d855b61971f4f5d2451e40348ae812a Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Tue, 22 Oct 2024 20:11:06 +0000 Subject: [PATCH 22/28] rename file --- scripts/{make_cal_sw_disp_tiles.py => make_cal_tiles.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scripts/{make_cal_sw_disp_tiles.py => make_cal_tiles.py} (100%) diff --git a/scripts/make_cal_sw_disp_tiles.py b/scripts/make_cal_tiles.py similarity index 100% rename from scripts/make_cal_sw_disp_tiles.py rename to scripts/make_cal_tiles.py From d3528a0447f360f69624eea4de6d659abb0ead33 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Tue, 22 Oct 2024 20:33:41 +0000 Subject: [PATCH 23/28] fix rename bug --- scripts/make_cal_meta_tiles.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/make_cal_meta_tiles.py b/scripts/make_cal_meta_tiles.py index d3bf9c1..48c4a2a 100644 --- a/scripts/make_cal_meta_tiles.py +++ b/scripts/make_cal_meta_tiles.py @@ -1,7 +1,7 @@ import argparse from pathlib import Path -from opera_disp_tms.generate_frame_tile import create_product_name, create_tile_for_bbox +from opera_disp_tms.generate_metadata_tile import create_product_name, create_tile_for_bbox def make_cal_meta_tiles(orbit_direction): From 1329672699dd36293f12cf9133cb5fbf37e12ef6 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Tue, 22 Oct 2024 20:50:36 +0000 Subject: [PATCH 24/28] small improvements to scripts --- scripts/make_cal_tiles.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/make_cal_tiles.py b/scripts/make_cal_tiles.py index e149854..f7036cb 100644 --- a/scripts/make_cal_tiles.py +++ b/scripts/make_cal_tiles.py @@ -34,19 +34,19 @@ def main(): parser = argparse.ArgumentParser(description='Create a short wavelength cumulative displacement tile') parser.add_argument('meta_tile_dir', type=str, help='Path to the metadata tile') parser.add_argument( - 'workflow', type=str, options=['displacement', 'velocity'], help='Workflow to run (displacement or velocity)' + 'workflow', type=str, choices=['displacement', 'velocity'], help='Workflow to run (displacement or velocity)' ) parser.add_argument( - '--begin_date', + '--begin-date', type=str, default='20170101', help='Start of secondary date search range to generate tile for (e.g., 20240101)', ) parser.add_argument( - '--end_date', + '--end-date', type=str, default='20171231', - help='End of secondary date search range to generate tile for (e.g., 20240101', + help='End of secondary date search range to generate tile for (e.g., 20240101)', ) args = parser.parse_args() From 1221f5c91441bca480222ca126b33ca16565173f Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Tue, 22 Oct 2024 21:43:49 +0000 Subject: [PATCH 25/28] fix product name bug --- scripts/make_cal_tiles.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/make_cal_tiles.py b/scripts/make_cal_tiles.py index f7036cb..909440a 100644 --- a/scripts/make_cal_tiles.py +++ b/scripts/make_cal_tiles.py @@ -11,7 +11,7 @@ def make_cal_tiles(meta_tile_dir: Path, workflow: str, begin_date: datetime, end_date: datetime): workflow_opts = { 'displacement': ['SW_CUMUL_DISP', create_sw_disp_tile, None], - 'velocity': ['SW_VEL', create_sw_vel_tile, [-0.05, 0.05]], + 'velocity': ['SW_VELOCITY', create_sw_vel_tile, [-0.05, 0.05]], } prod_type, create_tile_func, scale_range = workflow_opts[workflow] tiles = list(meta_tile_dir.glob('*.tif')) @@ -20,7 +20,6 @@ def make_cal_tiles(meta_tile_dir: Path, workflow: str, begin_date: datetime, end if product_name.exists(): print(f'{product_name} already exists. Skipping.') continue - create_tile_func(tile, begin_date, end_date) tiles = [str(x) for x in Path.cwd().glob('*.tif')] From f931b7578948eb05ca194c584105c0472ce886f0 Mon Sep 17 00:00:00 2001 From: Forrest Williams Date: Wed, 23 Oct 2024 08:57:34 -0500 Subject: [PATCH 26/28] update changelog and readme for release --- CHANGELOG.md | 8 +++++++- README.md | 4 ++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ac6613..fe6dea2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,13 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [0.3.0] ### Added -* Ability to generate SW Disp velocity tiles +* Ability to generate SW velocity tiles in `generated_sw_vel_tile.py` + +### Changed +* California dataset scripts to allow for generation of SW velocity tiles +* Rename `generate_frame_tile.py` to `generate_metadata_tile.py` +* Simplified CLI and filenames of metadata tiles so that location is specified by the upper-left corner of the tile, not the full bounding box +* Expanded/modified some functions in `generate_sw_disp_tile.py` so that they could be reused by `generate_sw_vel_tile.py` ## [0.2.0] ### Added diff --git a/README.md b/README.md index 3463a08..9d31507 100644 --- a/README.md +++ b/README.md @@ -76,9 +76,9 @@ Where `METADATA_ASCENDING_W125N42.tif` is the path to the frame metadata tile yo By default, the velocity will be calculated with **only** two data points (the first and last dates in the search range), but you can pass the optional `--full` flag to calculate the velocity using all available data in the search range. Using this option will significantly increase processing times. The resulting products have the name format: -`SW_VEL_DISP_{start date search range}_{stop data search range}_{orbit direction}_{upper left corner in lon/lat}.tif` +`SW_VELOCITY_DISP_{start date search range}_{stop data search range}_{orbit direction}_{upper left corner in lon/lat}.tif` For example: -`SW_VEL_DISP_20170901_20171231_ASCENDING_W125N42.tif` +`SW_VELOCITY_DISP_20170901_20171231_ASCENDING_W125N42.tif` ### Create a Tile Map The `create_tile_map` CLI command generates a directory with small .png tiles from a list of rasters in a common projection, following the OSGeo Tile Map Service Specification, using gdal2tiles: https://gdal.org/en/latest/programs/gdal2tiles.html From 71998f05f9de73aeb9947c83e0300b15efb74a58 Mon Sep 17 00:00:00 2001 From: jacquelynsmale Date: Wed, 23 Oct 2024 06:12:55 -0800 Subject: [PATCH 27/28] switch order of functions --- src/opera_disp_tms/create_tile_map.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/opera_disp_tms/create_tile_map.py b/src/opera_disp_tms/create_tile_map.py index bcde303..752ce5d 100644 --- a/src/opera_disp_tms/create_tile_map.py +++ b/src/opera_disp_tms/create_tile_map.py @@ -46,9 +46,6 @@ def create_tile_map(output_folder: str, input_rasters: list[str], scale_range: L vrt_info = gdal.Info(mosaic_vrt.name, stats=True, format='json') stats = vrt_info['bands'][0]['metadata'][''] - # get bounds of VRT and write to file - get_tile_extent(vrt_info, Path(output_folder)) - if scale_range is None: scale_range = [stats['STATISTICS_MINIMUM'], stats['STATISTICS_MAXIMUM']] @@ -74,6 +71,9 @@ def create_tile_map(output_folder: str, input_rasters: list[str], scale_range: L ] subprocess.run(command) + # get bounds of VRT and write to file + get_tile_extent(vrt_info, Path(output_folder)) + def main(): parser = argparse.ArgumentParser( From 24df0ff50b2054b88efe4b0817c94fc43f42b9a1 Mon Sep 17 00:00:00 2001 From: jacquelynsmale Date: Wed, 23 Oct 2024 06:17:09 -0800 Subject: [PATCH 28/28] update changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ac6613..ba63f02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,9 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ### Added * Ability to generate SW Disp velocity tiles +### Fixed +* `extent.json` will be written during `create_tile_map.py` even if the directory `tiles/` does not exist prior to running + ## [0.2.0] ### Added * Python scripts for generating California SW Disp tileset