podaac · frankinspace · Mar 18, 2024 · Mar 7, 2024 · Mar 7, 2024 · Mar 7, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,12 +6,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 ### Added
+    - Issue 85 - Add variable units to API response
     - Issue 105 - Only benchmarking data is being loaded even when load_benchmarking_data flag is false
     - Issue 79 - Generate data for use by benchmarks
     - Issue 88 - There are no CloudWatch logs for the API Gateway
     - Issue 75 - Update log messaging format
     - Issue 60 - Encapsulate DyanmoDB under a single shared module
     - Issue 60 - Improved error handling
+    - Issue 13 - Add SWORD version from shp.xml to DB entries
 ### Changed
 ### Deprecated 
 ### Removed

diff --git a/hydrocron/db/io/swot_reach_node_shp.py b/hydrocron/db/io/swot_reach_node_shp.py
@@ -5,12 +5,15 @@
 import json
 from datetime import datetime
 from importlib import resources
+import xml.etree.ElementTree as ET
+import zipfile
 import logging
 
 import geopandas as gpd
 import numpy as np
 import pandas as pd
 
+
 logging.getLogger().setLevel(logging.INFO)
 
 
@@ -44,8 +47,12 @@ def read_shapefile(filepath, obscure_data, columns, s3_resource=None):
         s3_resource.Bucket(bucket_name).download_file(key, lambda_temp_file)
 
         shp_file = gpd.read_file('zip://' + lambda_temp_file)
+        with zipfile.ZipFile(lambda_temp_file) as archive:
+            shp_xml_tree = ET.fromstring(archive.read(filename[:-4] + ".shp.xml"))
     else:
         shp_file = gpd.read_file('zip://' + filepath)
+        with zipfile.ZipFile(filepath) as archive:
+            shp_xml_tree = ET.fromstring(archive.read(filename[:-4] + ".shp.xml"))
 
     numeric_columns = shp_file[columns].select_dtypes(include=[np.number]).columns
     if obscure_data:
@@ -59,14 +66,57 @@ def read_shapefile(filepath, obscure_data, columns, s3_resource=None):
     shp_file = shp_file.astype(str)
     filename_attrs = parse_from_filename(filename)
 
-    items = assemble_attributes(shp_file, filename_attrs)
+    xml_attrs = parse_metadata_from_shpxml(shp_xml_tree)
+
+    attributes = filename_attrs | xml_attrs
+    items = assemble_attributes(shp_file, attributes)
 
     if os.path.exists(lambda_temp_file):
         os.remove(lambda_temp_file)
 
     return items
 
 
+def parse_metadata_from_shpxml(xml_elem):
+    """
+    Read the SWORD version number from the shp.xml file
+    and add to the database fields
+
+    Parameters
+    ----------
+    xml_elem : xml.etree.ElementTree.Element
+        an Element representation of the shp.xml metadata file
+
+    Returns
+    -------
+    metadata_attrs : dict
+        a dictionary of metadata attributes to add to record
+    """
+    # get SWORD version
+    for globs in xml_elem.findall('global_attributes'):
+        prior_db_files = globs.find('xref_prior_river_db_files').text
+
+    metadata_attrs = {
+        'sword_version': prior_db_files[-5:-3]
+    }
+
+    # get units on fields that have them
+    for child in xml_elem:
+        if child.tag == 'attributes':
+            for field in child:
+                try:
+                    units = field.find('units').text
+                except AttributeError:
+                    units = ""
+                    logging.info('No units on field %s', field.tag)
+
+                if units != "":
+                    unit_field_name = field.tag + "_units"
+                    metadata_attrs[unit_field_name] = units
+
+    return metadata_attrs
+
+
 def assemble_attributes(file_as_str, attributes):
     """
     Helper function to concat file attributes to records

diff --git a/hydrocron/utils/constants.py b/hydrocron/utils/constants.py
@@ -22,7 +22,9 @@
     "time": "739741183.129",
     "time_str": "2023-06-10T19:39:43Z",
     "wse": "286.2983",
-    "cycle_id": "548"
+    "cycle_id": "548",
+    "sword_version": "15",
+    "p_lat_units": "degrees_north"
 }
 
 DB_TEST_TABLE_NAME = "hydrocron-swot-test-table"
@@ -32,6 +34,9 @@
 TEST_REACH_ID_VALUE = '71224100223'
 TEST_TIME_VALUE = '2023-06-10T19:33:37Z'
 TEST_WSE_VALUE = '286.2983'
+TEST_SWORD_VERSION_VALUE = '15'
+TEST_UNITS_FIELD = 'p_lat_units'
+TEST_UNITS = 'degrees_north'
 
 # ------------ #
 # PROD CONSTANTS #
@@ -54,6 +59,7 @@
 FIELDNAME_SLOPE = 'slope'
 FIELDNAME_P_LON = 'p_lon'
 FIELDNAME_P_LAT = 'p_lat'
+FIELDNAME_SWORD_VERSION = 'sword_version'
 
 S3_CREDS_ENDPOINT = "https://archive.swot.podaac.earthdata.nasa.gov/s3credentials"
 

diff --git a/tests/test_hydrocron_database.py b/tests/test_hydrocron_database.py
@@ -50,7 +50,10 @@ def test_query(hydrocron_dynamo_table):
 
     items = hydrocron_dynamo_table.run_query(
         partition_key=constants.TEST_REACH_ID_VALUE)
+
     assert items[0][constants.FIELDNAME_WSE] == constants.TEST_WSE_VALUE
+    assert items[0][constants.FIELDNAME_SWORD_VERSION] == constants.TEST_SWORD_VERSION_VALUE
+    assert items[0][constants.TEST_UNITS_FIELD] == constants.TEST_UNITS
 
 
 def test_delete_item(hydrocron_dynamo_table):