diff --git a/CHANGELOG.md b/CHANGELOG.md index a828aa39..da49d308 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.5.8] +### Changed +- As an incremental improvement to deduplication performance, its-live-monitoring now: + - searches the `s3://its-live-data` bucket directly for already published (succeeded) pairs. + - searches HyP3 ITS_LIVE via the API for pairs still pending or running, instead of searching for all previously submitted pairs. +- Upgrade numpy from 1.26.4 to 2.1.3 ## [0.5.7] ### Fixed diff --git a/its_live_monitoring/src/main.py b/its_live_monitoring/src/main.py index ce15cf61..aedeb600 100644 --- a/its_live_monitoring/src/main.py +++ b/its_live_monitoring/src/main.py @@ -5,9 +5,13 @@ import logging import os import sys +from typing import Iterable +import boto3 +import botocore.config import geopandas as gpd import hyp3_sdk as sdk +import numpy as np import pandas as pd from landsat import ( @@ -35,11 +39,88 @@ log = logging.getLogger('its_live_monitoring') log.setLevel(os.environ.get('LOGGING_LEVEL', 'INFO')) +s3 = boto3.client( + 's3', + config=botocore.config.Config(signature_version=botocore.UNSIGNED), +) -def deduplicate_hyp3_pairs(pairs: gpd.GeoDataFrame) -> gpd.GeoDataFrame: - """Ensure we don't submit duplicate jobs to HyP3. - Search HyP3 jobs since the reference scene's acquisition date and remove already processed pairs +def point_to_region(lat: float, lon: float) -> str: + """Returns a string (for example, N78W124) of a region name based on granule center point lat,lon.""" + nw_hemisphere = 'S' if np.signbit(lat) else 'N' + ew_hemisphere = 'W' if np.signbit(lon) else 'E' + + region_lat = int(np.abs(np.fix(lat / 10) * 10)) + if region_lat == 90: # if you are exactly at a pole, put in lat = 80 bin + region_lat = 80 + + region_lon = int(np.abs(np.fix(lon / 10) * 10)) + if region_lon >= 180: # if you are at the dateline, back off to the 170 bin + region_lon = 170 + + return f'{nw_hemisphere}{region_lat:02d}{ew_hemisphere}{region_lon:03d}' + + +def regions_from_bounds(min_lon: float, min_lat: float, max_lon: float, max_lat: float) -> set[str]: + """Returns a set of all region names within a bounding box.""" + lats, lons = np.mgrid[min_lat : max_lat + 10 : 10, min_lon : max_lon + 10 : 10] + return {point_to_region(lat, lon) for lat, lon in zip(lats.ravel(), lons.ravel())} + + +def get_key(tile_prefixes: Iterable[str], reference: str, secondary: str) -> str | None: + """Search S3 for the key of a processed pair. + + Args: + tile_prefixes: s3 tile path prefixes + reference: reference scene name + secondary: secondary scene name + + Returns: + The key or None if one wasn't found. + """ + # NOTE: hyp3-autorift enforces earliest scene as the reference scene and will write files accordingly, + # but its-live-monitoring uses the latest scene as the reference scene, so enforce autorift convention + reference, secondary = sorted([reference, secondary]) + + for tile_prefix in tile_prefixes: + prefix = f'{tile_prefix}/{reference}_X_{secondary}' + response = s3.list_objects_v2( + Bucket='its-live-data', + Prefix=prefix, + ) + for item in response.get('Contents', []): + if item['Key'].endswith('.nc'): + return item['Key'] + return None + + +def deduplicate_s3_pairs(pairs: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + """Ensures that pairs aren't submitted if they already have a product in S3. + + Args: + pairs: A GeoDataFrame containing *at least* these columns: `reference`, `reference_acquisition`, and + `secondary`. + + Returns: + The pairs GeoDataFrame with any already submitted pairs removed. + """ + s2_prefix = 'velocity_image_pair/sentinel2/v02' + landsat_prefix = 'velocity_image_pair/landsatOLI/v02' + prefix = s2_prefix if pairs['reference'][0].startswith('S2') else landsat_prefix + + regions = regions_from_bounds(*pairs['geometry'].total_bounds) + tile_prefixes = [f'{prefix}/{region}' for region in regions] + + drop_indexes = [] + for idx, reference, secondary in pairs[['reference', 'secondary']].itertuples(): + if get_key(tile_prefixes=tile_prefixes, reference=reference, secondary=secondary): + drop_indexes.append(idx) + + return pairs.drop(index=drop_indexes) + + +def deduplicate_hyp3_pairs(pairs: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + """Search HyP3 jobs since the reference scene's acquisition date and remove already submitted (in PENDING or RUNNING state) pairs. Args: pairs: A GeoDataFrame containing *at least* these columns: `reference`, `reference_acquisition`, and @@ -48,13 +129,24 @@ def deduplicate_hyp3_pairs(pairs: gpd.GeoDataFrame) -> gpd.GeoDataFrame: Returns: The pairs GeoDataFrame with any already submitted pairs removed. """ - jobs = HYP3.find_jobs( + pending_jobs = HYP3.find_jobs( job_type='AUTORIFT', start=pairs.iloc[0].reference_acquisition, name=pairs.iloc[0].reference, user_id=EARTHDATA_USERNAME, + status_code='PENDING', ) + running_jobs = HYP3.find_jobs( + job_type='AUTORIFT', + start=pairs.iloc[0].reference_acquisition, + name=pairs.iloc[0].reference, + user_id=EARTHDATA_USERNAME, + status_code='RUNNING', + ) + + jobs = pending_jobs + running_jobs + df = pd.DataFrame([job.job_parameters['granules'] for job in jobs], columns=['reference', 'secondary']) df = df.set_index(['reference', 'secondary']) pairs = pairs.set_index(['reference', 'secondary']) @@ -123,7 +215,13 @@ def process_scene( if len(pairs) > 0: pairs = deduplicate_hyp3_pairs(pairs) - log.info(f'Deduplicated pairs; {len(pairs)} remaining') + log.info(f'Deduplicated HyP3 running/pending pairs; {len(pairs)} remaining') + with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', None): + log.debug(pairs.sort_values(by=['secondary'], ascending=False).loc[:, ['reference', 'secondary']]) + + pairs = deduplicate_s3_pairs(pairs) + + log.info(f'Deduplicated already published pairs; {len(pairs)} remaining') with pd.option_context('display.max_rows', None, 'display.max_columns', None, 'display.width', None): log.debug(pairs.sort_values(by=['secondary'], ascending=False).loc[:, ['reference', 'secondary']]) diff --git a/requirements-all.txt b/requirements-all.txt index 53168afe..f15dc5da 100644 --- a/requirements-all.txt +++ b/requirements-all.txt @@ -1,6 +1,6 @@ -r requirements-its_live_monitoring.txt -r requirements-status-messages.txt -cfn-lint==1.18.0 -ruff==0.6.9 +cfn-lint==1.19.0 +ruff==0.7.4 pytest==8.3.3 responses==0.25.3 diff --git a/requirements-its_live_monitoring.txt b/requirements-its_live_monitoring.txt index 74f83500..415fae97 100644 --- a/requirements-its_live_monitoring.txt +++ b/requirements-its_live_monitoring.txt @@ -1,6 +1,7 @@ geopandas==1.0.1 hyp3-sdk==7.0.1 pandas==2.2.3 -pystac-client==0.8.4 +pystac-client==0.8.5 requests==2.32.3 shapely==2.0.6 +numpy==2.1.3 diff --git a/tests/conftest.py b/tests/conftest.py index a40d2ba2..e52b911f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -36,6 +36,18 @@ def create_pystac_item( return create_pystac_item +@pytest.fixture +def stac_search_factory(): + class MockItemSearch: + def __init__(self, items: list[pystac.item.Item]): + self.items = items + + def pages(self): + return [self.items] + + return MockItemSearch + + @pytest.fixture def hyp3_job_factory(): def create_hyp3_job(granules: list) -> sdk.Job: diff --git a/tests/its_live_monitoring/test_landsat.py b/tests/its_live_monitoring/test_landsat.py index 7575a3dc..3c77aa1d 100644 --- a/tests/its_live_monitoring/test_landsat.py +++ b/tests/its_live_monitoring/test_landsat.py @@ -1,11 +1,12 @@ from copy import deepcopy from datetime import datetime -from unittest.mock import MagicMock, patch +from unittest.mock import patch import landsat -def test_get_landsat_stac_item(pystac_item_factory): +@patch('landsat.LANDSAT_COLLECTION.get_item') +def test_get_landsat_stac_item(mock_landsat_get_item, pystac_item_factory): scene = 'LC08_L1TP_138041_20240128_20240207_02_T1' properties = { 'instruments': ['OLI'], @@ -18,10 +19,8 @@ def test_get_landsat_stac_item(pystac_item_factory): collection = 'landsat-c2l1' expected_item = pystac_item_factory(id=scene, datetime=datetime.now(), properties=properties, collection=collection) - with patch('landsat.LANDSAT_COLLECTION', MagicMock()): - landsat.LANDSAT_COLLECTION.get_item.return_value = expected_item - item = landsat.get_landsat_stac_item(scene) - + mock_landsat_get_item.side_effect = [expected_item] + item = landsat.get_landsat_stac_item(scene) assert item.collection_id == collection assert item.properties == properties @@ -99,7 +98,8 @@ def test_qualifies_for_processing(pystac_item_factory): assert landsat.qualifies_for_landsat_processing(item) -def test_get_landsat_pairs_for_reference_scene(pystac_item_factory): +@patch('landsat.LANDSAT_CATALOG.search') +def test_get_landsat_pairs_for_reference_scene(mock_landsat_get_item, pystac_item_factory, stac_search_factory): properties = { 'instruments': ['OLI'], 'landsat:collection_category': 'T1', @@ -129,9 +129,8 @@ def test_get_landsat_pairs_for_reference_scene(pystac_item_factory): pystac_item_factory(id=scene, datetime=date_time, properties=properties, collection=collection) ) - with patch('landsat.LANDSAT_CATALOG', MagicMock()): - landsat.LANDSAT_CATALOG.search().pages.return_value = (sec_items,) - df = landsat.get_landsat_pairs_for_reference_scene(ref_item) + mock_landsat_get_item.side_effect = [stac_search_factory(sec_items)] + df = landsat.get_landsat_pairs_for_reference_scene(ref_item) assert (df['landsat:wrs_path'] == ref_item.properties['landsat:wrs_path']).all() assert (df['landsat:wrs_row'] == ref_item.properties['landsat:wrs_row']).all() @@ -140,7 +139,10 @@ def test_get_landsat_pairs_for_reference_scene(pystac_item_factory): assert (df['reference'] == ref_item.id).all() -def test_get_landsat_pairs_for_off_nadir_reference_scene(pystac_item_factory): +@patch('landsat.LANDSAT_CATALOG.search') +def test_get_landsat_pairs_for_off_nadir_reference_scene( + mock_landsat_get_item, pystac_item_factory, stac_search_factory +): properties = { 'instruments': ['OLI'], 'landsat:collection_category': 'T1', @@ -171,9 +173,8 @@ def test_get_landsat_pairs_for_off_nadir_reference_scene(pystac_item_factory): props['view:off_nadir'] = off_nadir sec_items.append(pystac_item_factory(id=scene, datetime=date_time, properties=props, collection=collection)) - with patch('landsat.LANDSAT_CATALOG', MagicMock()): - landsat.LANDSAT_CATALOG.search().pages.return_value = (sec_items,) - df = landsat.get_landsat_pairs_for_reference_scene(ref_item) + mock_landsat_get_item.side_effect = [stac_search_factory(sec_items)] + df = landsat.get_landsat_pairs_for_reference_scene(ref_item) assert (df['view:off_nadir'] > 0).all() diff --git a/tests/its_live_monitoring/test_main.py b/tests/its_live_monitoring/test_main.py index ed89cc2d..7cbc07b2 100644 --- a/tests/its_live_monitoring/test_main.py +++ b/tests/its_live_monitoring/test_main.py @@ -1,12 +1,68 @@ -from unittest.mock import MagicMock, patch +from unittest.mock import patch import geopandas as gpd import hyp3_sdk as sdk +from shapely import Polygon import main -def test_deduplicate_hyp3_pairs(hyp3_batch_factory): +def test_point_to_region(): + assert main.point_to_region(63.0, 128.0) == 'N60E120' + assert main.point_to_region(-63.0, 128.0) == 'S60E120' + assert main.point_to_region(63.0, -128.0) == 'N60W120' + assert main.point_to_region(-63.0, -128.0) == 'S60W120' + assert main.point_to_region(0.0, 128.0) == 'N00E120' + assert main.point_to_region(0.0, -128.0) == 'N00W120' + assert main.point_to_region(63.0, 0.0) == 'N60E000' + assert main.point_to_region(-63.0, 0.0) == 'S60E000' + assert main.point_to_region(0.0, 0.0) == 'N00E000' + # particularly weird edge cases which can arise if you round the point before passing it in + assert main.point_to_region(-0.0, 0.0) == 'S00E000' + assert main.point_to_region(-0.0, -0.0) == 'S00W000' + assert main.point_to_region(0.0, -0.0) == 'N00W000' + + +def test_regions_from_bounds(): + assert main.regions_from_bounds(-128.0, -63.0, -109.0, -54.0) == { + 'S60W120', + 'S60W110', + 'S60W100', + 'S50W120', + 'S50W110', + 'S50W100', + } + assert main.regions_from_bounds(-5.0, -5.0, 5.0, 5.0) == {'S00W000', 'S00E000', 'N00W000', 'N00E000'} + assert main.regions_from_bounds(104.0, 53.0, 123.0, 61.0) == { + 'N60E120', + 'N60E110', + 'N60E100', + 'N50E120', + 'N50E110', + 'N50E100', + } + assert main.regions_from_bounds(-128.0, -63.0, -128.0, -63.0) == {'S60W120'} + + +@patch('main.s3.list_objects_v2') +def test_get_key(mock_list_objects_v2): + mock_list_objects_v2.side_effect = [ + {'Contents': []}, + { + 'Contents': [ + {'Key': 'foo'}, + {'Key': 'bar'}, + {'Key': 'N00E010/earliest_X_latest_G0120V02_P000.nc'}, + {'Key': 'fizz'}, + ] + }, + ] + + assert main.get_key(['N00E000', 'N00E010'], 'latest', 'earliest') == 'N00E010/earliest_X_latest_G0120V02_P000.nc' + + +@patch('main.HYP3.find_jobs') +def test_deduplicate_hyp3_pairs(mock_find_jobs, hyp3_batch_factory): sec_scenes = [ 'LC09_L1TP_138041_20240120_20240120_02_T1', 'LC08_L1TP_138041_20240112_20240123_02_T1', @@ -19,25 +75,54 @@ def test_deduplicate_hyp3_pairs(hyp3_batch_factory): {'reference': ref_scenes, 'secondary': sec_scenes, 'reference_acquisition': ref_acquisitions} ) - with patch('main.HYP3.find_jobs', MagicMock(return_value=sdk.Batch())): - pairs = main.deduplicate_hyp3_pairs(landsat_pairs) - + mock_find_jobs.side_effect = [sdk.Batch(), sdk.Batch()] + pairs = main.deduplicate_hyp3_pairs(landsat_pairs) assert pairs.equals(landsat_pairs) - landsat_jobs = hyp3_batch_factory(zip(ref_scenes, sec_scenes)) - with patch('main.HYP3.find_jobs', MagicMock(return_value=landsat_jobs)): - pairs = main.deduplicate_hyp3_pairs(landsat_pairs) - + mock_find_jobs.side_effect = [hyp3_batch_factory(zip(ref_scenes, sec_scenes)), sdk.Batch()] + pairs = main.deduplicate_hyp3_pairs(landsat_pairs) assert len(pairs) == 0 - landsat_jobs = hyp3_batch_factory(zip(ref_scenes[:-1], sec_scenes[:-1])) - with patch('main.HYP3.find_jobs', MagicMock(return_value=landsat_jobs)): - pairs = main.deduplicate_hyp3_pairs(landsat_pairs) - + mock_find_jobs.side_effect = [hyp3_batch_factory(zip(ref_scenes[:-1], sec_scenes[:-1])), sdk.Batch()] + pairs = main.deduplicate_hyp3_pairs(landsat_pairs) assert len(pairs) == 1 -def test_submit_pairs_for_processing(hyp3_batch_factory): +@patch('main.get_key') +def test_deduplicate_s3_pairs(mock_get_key): + sec_scenes = [ + 'LC09_L1TP_138041_20240120_20240120_02_T1', + 'LC08_L1TP_138041_20240112_20240123_02_T1', + 'LC09_L1TP_138041_20240104_20240104_02_T1', + ] + ref_scenes = ['LC08_L1TP_138041_20240128_20240207_02_T1'] * 3 + ref_acquisitions = ['2024-01-28T04:29:49.361022Z'] * 3 + geometries = [Polygon.from_bounds(0, 0, 1, 1)] * 3 + + landsat_pairs = gpd.GeoDataFrame( + { + 'reference': ref_scenes, + 'secondary': sec_scenes, + 'reference_acquisition': ref_acquisitions, + 'geometry': geometries, + } + ) + + mock_get_key.side_effect = [None, None, None] + pairs = main.deduplicate_s3_pairs(landsat_pairs) + assert pairs.equals(landsat_pairs) + + mock_get_key.side_effect = [None, 'foo', None] + pairs = main.deduplicate_s3_pairs(landsat_pairs) + assert pairs.equals(landsat_pairs.drop(1)) + + mock_get_key.side_effect = ['foo', 'bar', 'bazz'] + pairs = main.deduplicate_s3_pairs(landsat_pairs) + assert pairs.equals(landsat_pairs.drop(0).drop(1).drop(2)) + + +@patch('main.HYP3.submit_prepared_jobs') +def test_submit_pairs_for_processing(mock_submit_prepared_jobs, hyp3_batch_factory): sec_scenes = [ 'LC09_L1TP_138041_20240120_20240120_02_T1', 'LC08_L1TP_138041_20240112_20240123_02_T1', @@ -48,7 +133,6 @@ def test_submit_pairs_for_processing(hyp3_batch_factory): landsat_jobs = hyp3_batch_factory(zip(ref_scenes, sec_scenes)) landsat_pairs = gpd.GeoDataFrame({'reference': ref_scenes, 'secondary': sec_scenes}) - with patch('main.HYP3.submit_prepared_jobs', MagicMock(return_value=landsat_jobs)): - jobs = main.submit_pairs_for_processing(landsat_pairs) - + mock_submit_prepared_jobs.side_effect = [landsat_jobs] + jobs = main.submit_pairs_for_processing(landsat_pairs) assert jobs == landsat_jobs diff --git a/tests/its_live_monitoring/test_sentinel2.py b/tests/its_live_monitoring/test_sentinel2.py index 35165d26..91e1b146 100644 --- a/tests/its_live_monitoring/test_sentinel2.py +++ b/tests/its_live_monitoring/test_sentinel2.py @@ -1,6 +1,6 @@ from copy import deepcopy from datetime import datetime -from unittest.mock import MagicMock, patch +from unittest.mock import patch import pystac import pytest @@ -26,7 +26,8 @@ def test_raise_for_missing_in_google_cloud(): sentinel2.raise_for_missing_in_google_cloud(missing_scene) -def test_get_sentinel2_stac_item(pystac_item_factory): +@patch('sentinel2.SENTINEL2_CATALOG.search') +def test_get_sentinel2_stac_item(mock_sentinel2_search, pystac_item_factory, stac_search_factory): scene = 'S2B_13CES_20200315_0_L1C' properties = { 'grid:code': 'MGRS-13CES', @@ -39,27 +40,18 @@ def test_get_sentinel2_stac_item(pystac_item_factory): date_time = '2020-03-15T15:22:59.024Z' expected_item = pystac_item_factory(id=scene, datetime=date_time, properties=properties, collection=collection) - class MockItemSearch: - def __init__(self, item: pystac.item.Item): - self.items = [item] if item else [] - - def pages(self): - return [self.items] - - with patch('sentinel2.SENTINEL2_CATALOG', MagicMock()): - sentinel2.SENTINEL2_CATALOG.search.return_value = MockItemSearch(expected_item) - item = sentinel2.get_sentinel2_stac_item(scene) - + mock_sentinel2_search.side_effect = [stac_search_factory([expected_item])] + item = sentinel2.get_sentinel2_stac_item(scene) assert item.collection_id == collection assert item.properties == properties - with patch('sentinel2.SENTINEL2_CATALOG', MagicMock()): - sentinel2.SENTINEL2_CATALOG.search.return_value = MockItemSearch(None) - with pytest.raises(ValueError): - item = sentinel2.get_sentinel2_stac_item(scene) + mock_sentinel2_search.side_effect = [stac_search_factory([])] + with pytest.raises(ValueError): + _ = sentinel2.get_sentinel2_stac_item(scene) -def test_qualifies_for_processing(pystac_item_factory): +@patch('sentinel2.get_data_coverage_for_item') +def test_qualifies_for_processing(mock_data_coverage_for_item, pystac_item_factory): properties = { 'grid:code': 'MGRS-19DEE', 'eo:cloud_cover': 30, @@ -72,63 +64,80 @@ def test_qualifies_for_processing(pystac_item_factory): id='XXX_XXXL1C_XXXX_XXXX_XXXX', datetime=datetime.now(), properties=properties, collection=collection ) - with patch('sentinel2.get_data_coverage_for_item', (lambda x: 75.0)): - assert sentinel2.qualifies_for_sentinel2_processing(good_item) - - item = deepcopy(good_item) - item.collection_id = 'foo' - assert not sentinel2.qualifies_for_sentinel2_processing(item) - - item = deepcopy(good_item) - item.properties['s2:product_type'] = 'S2MSI2A' - assert not sentinel2.qualifies_for_sentinel2_processing(item) - - item = deepcopy(good_item) - item.properties['instruments'] = ['mis'] - assert not sentinel2.qualifies_for_sentinel2_processing(item) - - item = deepcopy(good_item) - item.properties['grid:code'] = 'MGRS-30BZZ' - assert not sentinel2.qualifies_for_sentinel2_processing(item) - - item = deepcopy(good_item) - del item.properties['eo:cloud_cover'] - assert not sentinel2.qualifies_for_sentinel2_processing(item) - - item = deepcopy(good_item) - item.properties['eo:cloud_cover'] = -1 - assert not sentinel2.qualifies_for_sentinel2_processing(item) - - item = deepcopy(good_item) - item.properties['eo:cloud_cover'] = 0 - assert sentinel2.qualifies_for_sentinel2_processing(item) - - item = deepcopy(good_item) - item.properties['eo:cloud_cover'] = 1 - assert sentinel2.qualifies_for_sentinel2_processing(item) - - item = deepcopy(good_item) - item.properties['eo:cloud_cover'] = sentinel2.SENTINEL2_MAX_CLOUD_COVER_PERCENT - 1 - assert sentinel2.qualifies_for_sentinel2_processing(item) - - item = deepcopy(good_item) - item.properties['eo:cloud_cover'] = sentinel2.SENTINEL2_MAX_CLOUD_COVER_PERCENT - assert sentinel2.qualifies_for_sentinel2_processing(item) - - item = deepcopy(good_item) - item.properties['eo:cloud_cover'] = sentinel2.SENTINEL2_MAX_CLOUD_COVER_PERCENT + 1 - assert not sentinel2.qualifies_for_sentinel2_processing(item) - - item = deepcopy(good_item) - assert sentinel2.qualifies_for_sentinel2_processing(item, relative_orbit='R110') - - assert not sentinel2.qualifies_for_sentinel2_processing(item, relative_orbit='R100') - - with patch('sentinel2.get_data_coverage_for_item', (lambda x: 50.0)): - assert not sentinel2.qualifies_for_sentinel2_processing(good_item) - - -def test_get_sentinel2_pairs_for_reference_scene(pystac_item_factory): + mock_data_coverage_for_item.side_effect = [75.0] + assert sentinel2.qualifies_for_sentinel2_processing(good_item) + + mock_data_coverage_for_item.side_effect = [75.0] + item = deepcopy(good_item) + item.collection_id = 'foo' + assert not sentinel2.qualifies_for_sentinel2_processing(item) + + mock_data_coverage_for_item.side_effect = [75.0] + item = deepcopy(good_item) + item.properties['s2:product_type'] = 'S2MSI2A' + assert not sentinel2.qualifies_for_sentinel2_processing(item) + + mock_data_coverage_for_item.side_effect = [75.0] + item = deepcopy(good_item) + item.properties['instruments'] = ['mis'] + assert not sentinel2.qualifies_for_sentinel2_processing(item) + + mock_data_coverage_for_item.side_effect = [75.0] + item = deepcopy(good_item) + item.properties['grid:code'] = 'MGRS-30BZZ' + assert not sentinel2.qualifies_for_sentinel2_processing(item) + + mock_data_coverage_for_item.side_effect = [75.0] + item = deepcopy(good_item) + del item.properties['eo:cloud_cover'] + assert not sentinel2.qualifies_for_sentinel2_processing(item) + + mock_data_coverage_for_item.side_effect = [75.0] + item = deepcopy(good_item) + item.properties['eo:cloud_cover'] = -1 + assert not sentinel2.qualifies_for_sentinel2_processing(item) + + mock_data_coverage_for_item.side_effect = [75.0] + item = deepcopy(good_item) + item.properties['eo:cloud_cover'] = 0 + assert sentinel2.qualifies_for_sentinel2_processing(item) + + mock_data_coverage_for_item.side_effect = [75.0] + item = deepcopy(good_item) + item.properties['eo:cloud_cover'] = 1 + assert sentinel2.qualifies_for_sentinel2_processing(item) + + mock_data_coverage_for_item.side_effect = [75.0] + item = deepcopy(good_item) + item.properties['eo:cloud_cover'] = sentinel2.SENTINEL2_MAX_CLOUD_COVER_PERCENT - 1 + assert sentinel2.qualifies_for_sentinel2_processing(item) + + mock_data_coverage_for_item.side_effect = [75.0] + item = deepcopy(good_item) + item.properties['eo:cloud_cover'] = sentinel2.SENTINEL2_MAX_CLOUD_COVER_PERCENT + assert sentinel2.qualifies_for_sentinel2_processing(item) + + mock_data_coverage_for_item.side_effect = [75.0] + item = deepcopy(good_item) + item.properties['eo:cloud_cover'] = sentinel2.SENTINEL2_MAX_CLOUD_COVER_PERCENT + 1 + assert not sentinel2.qualifies_for_sentinel2_processing(item) + + mock_data_coverage_for_item.side_effect = [75.0] + item = deepcopy(good_item) + assert sentinel2.qualifies_for_sentinel2_processing(item, relative_orbit='R110') + + mock_data_coverage_for_item.side_effect = [75.0] + assert not sentinel2.qualifies_for_sentinel2_processing(item, relative_orbit='R100') + + mock_data_coverage_for_item.side_effect = [50.0] + assert not sentinel2.qualifies_for_sentinel2_processing(good_item) + + +@patch('sentinel2.SENTINEL2_CATALOG.search') +@patch('sentinel2.get_data_coverage_for_item') +def test_get_sentinel2_pairs_for_reference_scene( + mock_data_coverage_for_item, mock_sentinel2_search, pystac_item_factory, stac_search_factory +): scene = 'S2B_22TCR_20240528_0_L1C' properties = { 'eo:cloud_cover': 28.1884, @@ -192,10 +201,9 @@ def test_get_sentinel2_pairs_for_reference_scene(pystac_item_factory): props = deepcopy(properties) sec_items.append(pystac_item_factory(id=scene, datetime=date_time, properties=props, collection=collection)) - with patch('sentinel2.SENTINEL2_CATALOG', MagicMock()): - sentinel2.SENTINEL2_CATALOG.search().pages.return_value = (sec_items,) - with patch('sentinel2.get_data_coverage_for_item', (lambda x: 75.0)): - df = sentinel2.get_sentinel2_pairs_for_reference_scene(ref_item) + mock_sentinel2_search.side_effect = [stac_search_factory(sec_items)] + mock_data_coverage_for_item.side_effect = [75.0, 75.0, 75.0] + df = sentinel2.get_sentinel2_pairs_for_reference_scene(ref_item) assert (df['grid:code'] == ref_item.properties['grid:code']).all() for instrument in df['instruments']: