podaac · torimcd · Sep 25, 2024 · Apr 22, 2024 · Apr 30, 2024 · Apr 30, 2024
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -36,7 +36,7 @@ concurrency:
 env:
   POETRY_VERSION: "1.7.1"
   PYTHON_VERSION: "3.10"
-  TERRAFORM_VERSION: "1.7.3"
+  TERRAFORM_VERSION: "1.9.3"
   REGISTRY: ghcr.io
   IMAGE_NAME: ${{ github.repository }}
 

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,9 +8,26 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 ### Changed
+### Deprecated
+### Removed
+### Fixed
+### Security
+
+## [1.4.0]
+
+### Added
+    - Issue 205 - Define a an API key for the Confluence workflow and usage plan limits
+    - Issue 201- Create table for tracking granule ingest status
+    - Issue 225 - Create one track ingest table per feature type
+    - Issue 222 - Add operations to load granule Lambda to write granule record to track ingest database
+    - Issue 198 - Implement track ingest lambda function CMR and Hydrocron queries
+    - Issue 193 - Add new Dynamo table for prior lake data
+    - Issue 196 - Add new feature type to query the API for lake data
+### Changed
 ### Deprecated 
 ### Removed
 ### Fixed
+    - Issue 210 - Features with large geometries cannot be loaded
 ### Security
 
 ## [1.3.0]

diff --git a/README.md b/README.md
@@ -1,11 +1,13 @@
 ## Overview
+
 Hydrocron API is a new tool that implements functionalities that will allow 
 hydrologists to have direct access to filtered data from our newest satellites. 
 This innovative tool will provide an effortless way to filter data by feature ID, 
 date range, polygonal area, and more. This data will be returned in formats such 
 as CSV and geoJSON.
 
 ## Requirements
+
 Python 3.10+
 
 ## Running Locally with Docker
@@ -18,17 +20,22 @@ Python 3.10+
 ### 1. Build or Pull Hydrocron Docker
 
 Build the docker container:
+
 ```bash
+
 docker build . -f docker/Dockerfile -t hydrocron:latest
 ```
-Pull a pre-built image from https://github.com/podaac/hydrocron/pkgs/container/hydrocron:
+
+Pull a pre-built image from [https://github.com/podaac/hydrocron/pkgs/container/hydrocron](https://github.com/podaac/hydrocron/pkgs/container/hydrocron):
+
 ```bash
 docker pull ghcr.io/podaac/hydrocron:latest
 ```
 
 ### 2. Run Docker Compose
 
 Launch dynamodb local on port 8000 and hyrdrocron on port 9000
+
 ```bash
 docker-compose up
 ```
@@ -42,6 +49,7 @@ poetry install
 ```
 
 This will load the data in `test/data` into the local dynamo db instance.
+
 ```bash
 python tests/load_data_local.py
 ```

diff --git a/docs/examples.md b/docs/examples.md
@@ -270,6 +270,56 @@ Will return GeoJSON:
 }
 ```
 
+## Get time series GeoJSON for a lake
+
+Search for a single lake by ID.
+
+[https://soto.podaac.earthdatacloud.nasa.gov/hydrocron/v1/timeseries?feature=PriorLake&feature_id=6350036102&start_time=2024-07-20T00:00:00Z&end_time=2024-07-26T00:00:00Z&fields=lake_id,time_str,wse,area_total,quality_f,collection_shortname,crid,PLD_version,range_start_time&output=geojson](https://soto.podaac.earthdatacloud.nasa.gov/hydrocron/v1/timeseries?feature=PriorLake&feature_id=6350036102&start_time=2024-07-20T00:00:00Z&end_time=2024-07-26T00:00:00Z&fields=lake_id,time_str,wse,area_total,quality_f,collection_shortname,crid,PLD_version,range_start_time&output=geojson)
+
+Will return GeoJSON:
+
+```json
+{
+    "status": "200 OK",
+    "time": 391.613,
+    "hits": 1,
+    "results": {
+        "csv": "",
+        "geojson": {
+            "type": "FeatureCollection",
+            "features": [
+                {
+                    "id": "0",
+                    "type": "Feature",
+                    "properties": {
+                        "lake_id": "6350036102",
+                        "time_str": "2024-07-25T22:48:23Z",
+                        "wse": "260.802",
+                        "area_total": "0.553409",
+                        "quality_f": "1",
+                        "collection_shortname": "SWOT_L2_HR_LakeSP_2.0",
+                        "crid": "PIC0",
+                        "PLD_version": "105",
+                        "range_start_time": "2024-07-25T22:47:27Z",
+                        "wse_units": "m",
+                        "area_total_units": "km^2"
+                    },
+                    "geometry": {
+                        "type": "Point",
+                        "coordinates": [
+                            -42.590727027987064,
+                            -19.822613018107482
+                        ]
+                    }
+                }
+            ]
+        }
+    }
+}
+```
+
+**NOTE:** Due to the size of the original polygon in the lake (L2_HR_LakeSP) shapefiles, we are only returning the calculated center point of the lake. This is to facilitate conformance with the GeoJSON specification and center points should not be considered accurate.
+
 ## Get time series CSV for river reach
 
 Search for a single river reach by ID.
@@ -310,6 +360,26 @@ Will return CSV:
 }
 ```
 
+## Get time series CSV for lake
+
+Search for a single lake by ID.
+
+[https://soto.podaac.earthdatacloud.nasa.gov/hydrocron/v1/timeseries?feature=PriorLake&feature_id=6350036102&start_time=2024-07-20T00:00:00Z&end_time=2024-07-26T00:00:00Z&fields=lake_id,time_str,wse,area_total,quality_f,collection_shortname,crid,PLD_version,range_start_time&output=csv](https://soto.podaac.earthdatacloud.nasa.gov/hydrocron/v1/timeseries?feature=PriorLake&feature_id=6350036102&start_time=2024-07-20T00:00:00Z&end_time=2024-07-26T00:00:00Z&fields=lake_id,time_str,wse,area_total,quality_f,collection_shortname,crid,PLD_version,range_start_time&output=csv)
+
+Will return CSV:
+
+```json
+{
+    "status": "200 OK",
+    "time": 321.592,
+    "hits": 1,
+    "results": {
+        "csv": "lake_id,time_str,wse,area_total,quality_f,collection_shortname,crid,PLD_version,range_start_time,wse_units,area_total_units\n6350036102,2024-07-25T22:48:23Z,260.802,0.553409,1,SWOT_L2_HR_LakeSP_2.0,PIC0,105,2024-07-25T22:47:27Z,m,km^2\n",
+        "geojson": {}
+    }
+}
+```
+
 ## Accept headers
 
 See the [documentation on the timeseries endpoint](timeseries.md) for an explanation of Accept headers.

diff --git a/docs/intro.md b/docs/intro.md
@@ -1,5 +1,7 @@
 # Hydrocron Documentation
 
+[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.11176233.svg)](https://doi.org/10.5281/zenodo.11176233)
+
 Hydrocron is an API that repackages hydrology datasets from the Surface Water and Ocean Topography (SWOT) satellite into formats that make time-series analysis easier.
 
 SWOT data is archived as individually timestamped shapefiles, which would otherwise require users to perform potentially thousands of file IO operations per river feature to view the data as a timeseries. Hydrocron makes this possible with a single API call.
@@ -9,3 +11,4 @@ Original SWOT data is archived at NASA's [Physical Oceanography Distributed Acti
 Datasets included in Hydrocron:
 
 - [SWOT Level 2 River Single-Pass Vector Data Product, Version 2.0](https://podaac.jpl.nasa.gov/dataset/SWOT_L2_HR_RiverSP_2.0)
+- [SWOT Level 2 Lake Single-Pass Vector Data Product, Version 2.0](https://podaac.jpl.nasa.gov/dataset/SWOT_L2_HR_LakeSP_2.0)
diff --git a/docs/overview.md b/docs/overview.md
@@ -11,9 +11,24 @@ The main timeseries endpoint allows users to search by feature ID.
 River reach and node ID numbers are defined in the [SWOT River Database (SWORD)](https://doi.org/10.1029/2021WR030054),
 and can be browsed using the [SWORD Explorer Interactive Dashboard](https://www.swordexplorer.com/).
 
+Lake ID numbers are defined in the PLD (Prior Lake Database) and can be located in the SWOT shapefiles, see [SWOT Product Description Document for the L2_HR_LakeSP Dataset](https://podaac.jpl.nasa.gov/SWOT?tab=datasets-information&sections=about) for more information on lake identifiers.
+
 SWOT may observe lakes and rivers that do not have an ID in the prior databases. In those cases, hydrology features are added to the Unassigned Lakes data product.
 Hydrocron does not currently support Unassigned rivers and lakes.
 
+Hydrocron currently includes data from these datasets:
+
+- Reach and node shapefiles from the Level 2 KaRIn high rate river single pass vector product (L2_HR_RiverSP)
+- PLD-oriented shapefiles from the Level 2 KaRIn high rate lake single pass vector product (L2_HR_LakeSP)
+
+See this PO.DAAC [page](https://podaac.jpl.nasa.gov/SWOT?tab=datasets-information&sections=about) for more information on SWOT datasets.
+
 ## Limitations
 
-Data return size is limited to 6 MB. If your query response is larger than this a 413 error will be returned.
+Data return size is limited to **6 MB**. If your query response is larger than this a 413 error will be returned.
+
+**For Lake data:** Due to the size of the original polygon in the lake (L2_HR_LakeSP) shapefiles, we are only returning the calculated center point of the lake. This is to facilitate conformance with the GeoJSON specification and center points should not be considered accurate.
+
+## Citation
+
+Cite Hydrocron using the following DOI: [10.5281/zenodo.11176233](https://doi.org/10.5281/zenodo.11176233).
diff --git a/docs/timeseries.md b/docs/timeseries.md
@@ -85,16 +85,18 @@ Content-Type: text/csv
 
 ### feature : string, required: yes
 
-Type of feature being requested. Either: "Reach" or "Node"
+Type of feature being requested. Either: "Reach", "Node" or "PriorLake"
 
 ### feature_id : string, required: yes
 
 ID of the feature to retrieve
 
 - Reaches have the format CBBBBBRRRRT (e.g., 78340600051)
 - Nodes have the format CBBBBBRRRRNNNT (e.g., 12228200110861)
+- PriorLakes have the format CBBNNNNNNT (e.g., 2710046612)
 
-Please see the [SWOT Product Description Document for the L2_HR_RiverSP Dataset](https://podaac.jpl.nasa.gov/SWOT?tab=datasets-information&sections=about) for more information on identifiers.
+Please see the [SWOT Product Description Document for the L2_HR_RiverSP Dataset](https://podaac.jpl.nasa.gov/SWOT?tab=datasets-information&sections=about) for more information on reach and node identifiers.
+Please see the [SWOT Product Description Document for the L2_HR_LakeSP Dataset](https://podaac.jpl.nasa.gov/SWOT?tab=datasets-information&sections=about) for more information on lake identifiers.
 
 ### start_time : string, required: yes
 
@@ -136,7 +138,7 @@ The SWOT data fields to return in the request.
 
 This is specified in the form of a comma separated list (without any spaces): `fields=reach_id,time_str,wse,slope`
 
-Hydrocron includes additional fields beyond the source data shapefile attributes, including units fields on measurements, cycle and pass information, and SWORD and collection versions. **NOTE: Units are always returned for fields that have corresponding units stored in Hydrocron, they do not need to be requested.** The complete list of input fields that are available through Hydrocron are below:
+Hydrocron includes additional fields beyond the source data shapefile attributes, including units fields on measurements, cycle and pass information, SWORD and PLD (prior river and lake database names), and collection versions. **NOTE: Units are always returned for fields that have corresponding units stored in Hydrocron, they do not need to be requested.** The complete list of input fields that are available through Hydrocron are below:
 
 **Reach data fields**
 
@@ -196,6 +198,21 @@ Hydrocron includes additional fields beyond the source data shapefile attributes
 'crid', 'geometry', 'sword_version', 'collection_shortname'
 ```
 
+**Lake data fields**
+```bash
+'lake_id', 'reach_id', 'obs_id', 'overlap', 'n_overlap',
+'time', 'time_tai', 'time_str', 'wse', 'wse_u', 'wse_r_u', 'wse_std',
+'area_total', 'area_tot_u', 'area_detct', 'area_det_u',
+'layovr_val', 'xtrk_dist', 'ds1_l', 'ds1_l_u', 'ds1_q', 'ds1_q_u',
+'ds2_l', 'ds2_l_u', 'ds2_q', 'ds2_q_u',
+'quality_f', 'dark_frac', 'ice_clim_f', 'ice_dyn_f', 'partial_f',
+'xovr_cal_q', 'geoid_hght', 'solid_tide', 'load_tidef', 'load_tideg', 'pole_tide',
+'dry_trop_c', 'wet_trop_c', 'iono_c', 'xovr_cal_c', 'lake_name', 'p_res_id',
+'p_lon', 'p_lat', 'p_ref_wse', 'p_ref_area', 'p_date_t0', 'p_ds_t0', 'p_storage',
+'cycle_id', 'pass_id', 'continent_id', 'range_start_time', 'range_end_time',
+'crid', 'geometry', 'PLD_version', 'collection_shortname', 'crid'
+```
+
 ## Response Format
 
 ### Default
@@ -444,16 +461,13 @@ Example CSV response:
 
 *The 400 code is also currently returned for queries where no time series data could be located for the request specified feature ID. The message returned with the response indicates this and it can be helpful to adjust the date ranges you are searching.
 
-## API Keys [DRAFT]
-
-> ⚠️
->API keys not yet implemented but coming soon! Content below is not finalized. More details to follow...
+## API Keys
 
-Users may request a special API key for cases where their intended usage of the API may be considered heavy or more complex. Heavy usage can be defined as continued used with over x requests per day or continue use which require many requests per second or concurrent requests. To request an API key or to discuss your use case, please contact us at x.
+Users may request a special API key for cases where their intended usage of the API may be considered heavy or more complex. Heavy usage can be defined as continued use with many requests per hour or day or continued use which may require many requests per second or concurrent requests. To request an API key or to discuss your use case, please submit a [GitHub issue](https://github.com/podaac/hydrocron/issues).
 
 **Note: Users do *not* have to send an API key in their request to use the Hydrocron API. The API key is optional.**
 
-### How to use an API key in requests [DRAFT]
+### How to use an API key in requests
 
 Hydrocron API key header: `x-hydrocron-key`
 

diff --git a/hydrocron/api/controllers/authorizer.py b/hydrocron/api/controllers/authorizer.py
@@ -26,9 +26,10 @@ def authorization_handler(event, context):
     logging.info("Context: %s", context)
 
     api_key_trusted = "" if "x-hydrocron-key" not in event["headers"].keys() else event["headers"]["x-hydrocron-key"]
+    trusted_key_list = json.loads(STORED_API_KEY_TRUSTED)
 
-    if api_key_trusted and api_key_trusted == STORED_API_KEY_TRUSTED:
-        response_policy = create_policy("trusted_partner", "Allow", event["methodArn"], STORED_API_KEY_TRUSTED)
+    if api_key_trusted and api_key_trusted in trusted_key_list:
+        response_policy = create_policy("trusted_partner", "Allow", event["methodArn"], api_key_trusted)
         logging.info("Created policy for truster partner.")
 
     else:

diff --git a/hydrocron/api/controllers/timeseries.py b/hydrocron/api/controllers/timeseries.py
@@ -129,8 +129,8 @@ def validate_parameters(parameters):
 
     error_message = ''
 
-    if parameters['feature'] not in ('Node', 'Reach'):
-        error_message = f'400: feature parameter should be Reach or Node, not: {parameters["feature"]}'
+    if parameters['feature'] not in ('Node', 'Reach', 'PriorLake'):
+        error_message = f'400: feature parameter should be Reach, Node, or PriorLake, not: {parameters["feature"]}'
 
     elif not parameters['feature_id'].isdigit():
         error_message = f'400: feature_id cannot contain letters: {parameters["feature_id"]}'
@@ -189,6 +189,8 @@ def is_fields_valid(feature, fields):
         columns = constants.REACH_ALL_COLUMNS
     elif feature == 'Node':
         columns = constants.NODE_ALL_COLUMNS
+    elif feature == 'PriorLake':
+        columns = constants.PRIOR_LAKE_ALL_COLUMNS
     else:
         columns = []
     return all(field in columns for field in fields)
@@ -241,6 +243,8 @@ def timeseries_get(feature, feature_id, start_time, end_time, output, fields):
         results = data_repository.get_reach_series_by_feature_id(feature_id, start_time, end_time)
     if feature.lower() == 'node':
         results = data_repository.get_node_series_by_feature_id(feature_id, start_time, end_time)
+    if feature.lower() == 'priorlake':
+        results = data_repository.get_prior_lake_series_by_feature_id(feature_id, start_time, end_time)
 
     if len(results['Items']) == 0:
         data['http_code'] = '400 Bad Request'
@@ -343,15 +347,6 @@ def add_units(gdf, columns):
 def get_response(results, hits, elapsed, return_type, output, compact):
     """Create and return HTTP response based on results.
 
-    :param results: Dictionary of SWOT timeseries results
-    :type results: dict
-    :param hits: Number of results returned from query
-    :type hits: int
-    :param elapsed: Number of seconds it took to query for results
-    :type elapsed: float
-    :param return_type: Accept request header
-    :type return_type: str
-    :param output: Output to return in request
     :param results: Dictionary of SWOT timeseries results
     :type results: dict
     :param hits: Number of results returned from query

diff --git a/hydrocron/api/data_access/db.py b/hydrocron/api/data_access/db.py
@@ -56,3 +56,43 @@ def get_node_series_by_feature_id(self, feature_id, start_time, end_time):  # no
             Key(constants.SWOT_NODE_SORT_KEY).between(start_time, end_time))
         )
         return items
+
+    def get_prior_lake_series_by_feature_id(self, feature_id, start_time, end_time):  # noqa: E501 # pylint: disable=W0613
+        """
+
+        @param feature_id:
+        @param start_time:
+        @param end_time:
+        @return:
+        """
+        table_name = constants.SWOT_PRIOR_LAKE_TABLE_NAME
+
+        hydrocron_table = self._dynamo_instance.Table(table_name)
+        hydrocron_table.load()
+
+        items = hydrocron_table.query(KeyConditionExpression=(
+            Key(constants.SWOT_PRIOR_LAKE_PARTITION_KEY).eq(feature_id) &
+            Key(constants.SWOT_PRIOR_LAKE_SORT_KEY).between(start_time, end_time))
+        )
+        return items
+
+    def get_granule_ur(self, table_name, granule_ur):
+        """
+
+        @param table_name: str - Hydrocron table to query
+        @param granule_ur: str - Granule UR
+        @return: dictionary of items
+        """
+
+        hydrocron_table = self._dynamo_instance.Table(table_name)
+        hydrocron_table.load()
+
+        items = hydrocron_table.query(
+            ProjectionExpression="granuleUR",
+            Limit=1,
+            IndexName="GranuleURIndex",
+            KeyConditionExpression=(
+                Key("granuleUR").eq(granule_ur)
+            )
+        )
+        return items