made suggested updates

fsspec · Jul 8, 2024 · d37bd56 · d37bd56
1 parent eca50f2
commit d37bd56
Showing 1 changed file with 12 additions and 15 deletions.
diff --git a/kerchunk/grib2.py b/kerchunk/grib2.py
@@ -589,14 +589,13 @@ def correct_hrrr_subhf_step(group: Dict) -> Dict:
 def parse_grib_idx(
     basename: str,
     suffix: str = "idx",
-    storage_options: Dict = {},
-    tstamp: Optional["pd.Timestamp"] = None,
+    storage_options: Optional[Dict] = None,
     validate: bool = False,
 ) -> "pd.DataFrame":
     """
-    Standalone method used to extract metadata from a grib2 idx file(text) from NODD.
+    Parses per-message metadata from a grib2.idx file (text-type) to a dataframe of attributes
 
-    The function takes idx file, extracts the metadata known as attrs (variables with
+    The function uses the idx file, extracts the metadata known as attrs (variables with
     level and forecast time) from each idx entry and converts it into pandas
     DataFrame. The dataframe is later to build the one-to-one mapping to the grib file metadata.
 
@@ -608,8 +607,6 @@ def parse_grib_idx(
         The suffix is the ending for the idx file.
     storage_options: dict
         For accessing the data, passed to filesystem
-    tstamp : Optional[pd.Timestamp]
-        The timestamp to use for when the data was indexed
     validate : bool
         The validation if the metadata table has duplicate attrs.
 
@@ -619,28 +616,28 @@ def parse_grib_idx(
     """
     import pandas as pd
 
-    fs, _ = fsspec.core.url_to_fs(basename, **storage_options)
+    fs, _ = fsspec.core.url_to_fs(basename, **(storage_options or {}))
 
     fname = f"{basename}.{suffix}"
 
     baseinfo = fs.info(basename)
 
-    result = pd.read_csv(fs.open(fname), header=None, names=["raw_data"])
-    result[["idx", "offset", "date", "attrs"]] = result["raw_data"].str.split(
-        ":", expand=True, n=3
-    )
-    result["offset"] = result["offset"].astype(int)
+    with fs.open(fname) as f:
+        result = pd.read_csv(f, header=None, names=["raw_data"])
+        result[["idx", "offset", "date", "attrs"]] = result["raw_data"].str.split(
+            ":", expand=True, n=3
+        )
+        result["offset"] = result["offset"].astype(int)
 
-    # dropping the original single "raw_data" column before the formatting
-    result.drop(columns=["raw_data"], inplace=True)
+        # dropping the original single "raw_data" column after formatting
+        result.drop(columns=["raw_data"], inplace=True)
 
     result = result.assign(
         length=(
             result.offset.shift(periods=-1, fill_value=baseinfo["size"]) - result.offset
         ),
         idx_uri=fname,
         grib_uri=basename,
-        indexed_at=tstamp if tstamp else pd.Timestamp.now(),
     )
 
     if validate and not result["attrs"].is_unique: