fix(SFRData.add_to_segment_data):

* add one_inflow_per_path argument similar to add_to_perioddata function for MODFLOW 6 with default of False; previously variables such as inflows were limited to a single value per flowline routing path (in the interest of avoiding duplicates), but many applications may have multiple inflows along a path. * sum any multiple values so that there is only one value per SFR segment (required for SFR2 Package)
DOI-USGS · Sep 28, 2023 · 2aae76e · 2aae76e
1 parent e090954
commit 2aae76e
Show file tree

Hide file tree

Showing 6 changed files with 70 additions and 18 deletions.
diff --git a/examples/meras/meras_sfrmaker_config.yml b/examples/meras/meras_sfrmaker_config.yml
@@ -20,7 +20,7 @@ flowlines:
   name_column: GNIS_NAME
   attr_length_units: feet  # units of source data
   attr_height_units: feet  # units of source data
-inflows:  # see sfrmaker.data.add_to_perioddata for arguments
+inflows:  # see sfrmaker.flows.add_to_perioddata for arguments
   filename: inflows.csv
   line_id_column: line_id
   period_column: per  # column with model stress periods

diff --git a/examples/tylerforks/inflows.csv b/examples/tylerforks/inflows.csv
@@ -1,3 +1,5 @@
 end_datetime,inflow_m3d,SP,line_id_COMID
 10/31/1990,15000.0000,0,1815035
-11/30/1990,16000.0000,1,1815035
+11/30/1990,16000.0000,1,1815035
+10/31/1990,1500.0000,0,1814949
+11/30/1990,1600.0000,1,1814949
diff --git a/examples/tylerforks/tf_sfrmaker_config2.yml b/examples/tylerforks/tf_sfrmaker_config2.yml
@@ -10,7 +10,7 @@ flowlines:
   PlusFlowlineVAA: NHDPlus/NHDPlusAttributes/PlusFlowlineVAA.dbf
   PlusFlow: NHDPlus/NHDPlusAttributes/PlusFlow.dbf
   elevslope: NHDPlus/NHDPlusAttributes/elevslope.dbf
-inflows:  # see sfrmaker.data.add_to_perioddata for arguments
+inflows: # see sfrmaker.flows.add_to_segment_data for arguments
   filename: inflows.csv
   line_id_column: line_id_COMID
   period_column: SP  # column with model stress periods

diff --git a/sfrmaker/flows.py b/sfrmaker/flows.py
@@ -216,12 +216,12 @@ def add_to_perioddata(sfrdata, data, flowline_routing=None,
                       one_inflow_per_path=False,
                       distribute_flows_to_reaches=False):
     """Add data to the period data table (sfrdata.period_data)
-    for a MODFLOW-6 style sfrpackage.
+    for a MODFLOW-6 style SFR package.
 
     Parameters
     ----------
     sfrdata : sfrmaker.SFRData instance
-        SFRData instance with reach_data table attribute. To add observations from x, y coordinates,
+        SFRData instance with reach_data table attribute. To add data from x, y coordinates,
         the reach_data table must have a geometry column with LineStrings representing each reach, or
         an sfrlines_shapefile is required. Reach numbers are assumed to be in an 'rno' column.
     data : DataFrame, path to csv file, or list of DataFrames or file paths
@@ -468,8 +468,49 @@ def add_to_segment_data(sfrdata, data, flowline_routing=None,
                         line_id_column=None,
                         segment_column=None,
                         period_column='per',
-                        data_column='Q_avg'):
-    """Like add_to_perioddata, but for MODFLOW-2005.
+                        data_column='Q_avg',
+                        one_inflow_per_path=False):
+    """Add data to the segment data table (sfrdata.segment_data)
+    for a MODFLOW-2005 style SFR package.
+
+    Parameters
+    ----------
+    sfrdata : sfrmaker.SFRData instance
+        SFRData instance with reach_data table attribute. To add data from x, y coordinates,
+        the reach_data table must have a geometry column with LineStrings representing each reach, or
+        an sfrlines_shapefile is required. Reach numbers are assumed to be in an 'rno' column.
+    data : DataFrame, path to csv file, or list of DataFrames or file paths
+        Table with information on the inflow or other data sites to be located. Must have
+        either reach numbers (rno_column), line_ids (line_id_column),
+        or x and y locations (x_column_in_data and y_column_in_data).
+    flowline_routing : dict
+        Optional dictionary of routing for source hydrography. Only needed
+        if locating by line_id, and SFR network is a subset of the full source
+        hydrography (i.e. some lines were dropped in the creation of the SFR package,
+        or if the sites are inflow points corresponding to lines outside of the model perimeter).
+        In this case, points referenced to line_ids that are missing from the SFR
+        network are placed at the first reach corresponding to the next downstream line_id
+        that is represented in the SFR network. By default, None.
+    variable : str, optional
+        Modflow-2005 SFR Package variable (see the SFR2 Package documentation), by default 'flow'
+    line_id_column : str
+        Column in data matching observation sites to line_ids in the source hydrography data.
+        Either line_id_column or rno_column must be specified. By default, None
+    rno_column : str
+        Column in data matching observation sites to reach numbers in the SFR network. By default, None.
+    period_column : str, optional
+        Column with modflow stress period for each inflow value, by default 'per', by default, 'per'.
+    data_column : str, optional
+        Column with flow values, by default 'Q_avg'
+    one_inflow_per_path : bool, optional
+        Limit inflows to one per (headwater to outlet) routing path, choosing the inflow location 
+        that is furthest downstream. By default, False.
+    distribute_flows_to_reaches : bool, optional
+        Not implemented yet for MODFLOW-2005.
+
+    Returns
+    -------
+    Updates the sfrdata.segment_data DataFrame.
     """
     sfrd = sfrdata
 
@@ -492,7 +533,7 @@ def add_to_segment_data(sfrdata, data, flowline_routing=None,
             "Data to add need segment number or flowline routing information is needed."
 
     # check for duplicate inflows in same path
-    if variable == 'flow':
+    if variable == 'flow' and one_inflow_per_path:
         line_ids = set(data[line_id_column])
         drop = set()
         dropped_line_info_file = 'dropped_inflows_locations.csv'
@@ -521,14 +562,17 @@ def add_to_segment_data(sfrdata, data, flowline_routing=None,
     sfrd.segment_data.index = pd.MultiIndex.from_tuples(zip(sfrd.segment_data.per, sfrd.segment_data.nseg),
                                                         names=['per', 'nseg'])
     loc = list(zip(data.per, data.nseg))
-    data.index = pd.MultiIndex.from_tuples(loc, names=['per', 'nseg'])
-    replace = sorted(list(set(data.index).intersection(sfrd.segment_data.index)))
-    add = sorted(list(set(data.index).difference(sfrd.segment_data.index)))
-    sfrd.segment_data.loc[replace, variable] = data.loc[replace, variable]
+    # limit data to one row per segment, per period
+    # (sum multiple values)
+    per_seg_sums = data.groupby(['per', 'nseg']).last()
+    per_seg_sums[variable] = data.groupby(['per', 'nseg']).sum()[variable]
+    replace = sorted(list(set(per_seg_sums.index).intersection(sfrd.segment_data.index)))
+    add = sorted(list(set(per_seg_sums.index).difference(sfrd.segment_data.index)))
+    sfrd.segment_data.loc[replace, variable] = per_seg_sums.loc[replace, variable]
 
     # concat on the added data (create additional rows in segment_data table)
     to_concat = [sfrd.segment_data]
-    period_groups = data.loc[add, ['per', 'nseg', variable]].reset_index(drop=True).groupby('per')
+    period_groups = per_seg_sums.loc[add, [variable]].reset_index(drop=False).groupby('per')
     for per, group in period_groups:
         # start with existing data (row) for that segment
         df = sfrd.segment_data.loc[(slice(None, None), group.nseg), :].copy()

diff --git a/sfrmaker/sfrdata.py b/sfrmaker/sfrdata.py
@@ -415,13 +415,16 @@ def add_to_segment_data(self, data, flowline_routing,
                             line_id_column=None,
                             segment_column='segment',
                             period_column='per',
-                            data_column='Q_avg'):
+                            data_column='Q_avg',
+                            one_inflow_per_path=False
+                            ):
         return add_to_segment_data(self, data, flowline_routing,
                                    variable=variable,
                                    line_id_column=line_id_column,
                                    segment_column=segment_column,
                                    period_column=period_column,
-                                   data_column=data_column)
+                                   data_column=data_column,
+                                   one_inflow_per_path=one_inflow_per_path)
     @property
     def paths(self):
         """Dict listing routing sequence for each segment

diff --git a/sfrmaker/test/test_flows.py b/sfrmaker/test/test_flows.py
@@ -182,10 +182,13 @@ def test_add_to_segment_data(shellmound_sfrdata):
                         data_column='Q_avg')
     sd2 = sfrd.segment_data.copy()
     sd2.index = pd.MultiIndex.from_tuples(zip(sd2.per, sd2.nseg), names=['per', 'nseg'])
-    flows = flows.loc[~flows.line_id.isin([2])]
+    #flows = flows.loc[~flows.line_id.isin([2])]
+    #flows = flows.groupby(level=(0, 1)).sum()
     flows['nseg'] = [segment.get(l, segment[seq[-1]]) for l in flows.line_id]
-    flows.index = pd.MultiIndex.from_tuples(zip(flows.per, flows.nseg), names=['per', 'nseg'])
-    assert np.allclose(sd2.loc[flows.index, 'flow'], flows.Q_avg)
+    flow_sums = flows.groupby(['per', 'nseg']).first()
+    flow_sums['Q_avg'] = flows.groupby(['per', 'nseg'])['Q_avg'].sum()
+    #flows.index = pd.MultiIndex.from_tuples(zip(flows.per, flows.nseg), names=['per', 'nseg'])
+    assert np.allclose(sd2.loc[flow_sums.index, 'flow'], flow_sums.Q_avg)
     assert not sd2.isna().any().any()
     pd.testing.assert_frame_equal(sd1.drop('flow', axis=1),
                                   sd2.loc[sd1.index].drop('flow', axis=1),