From 4794d529685e138941ad26cf849249049701b326 Mon Sep 17 00:00:00 2001
From: jackieff <jackieff@umich.edu>
Date: Fri, 4 Dec 2020 20:36:51 -0500
Subject: [PATCH 1/5] Speeding up lookup of inp sections and bracketed words

---
 swmmio/utils/text.py | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/swmmio/utils/text.py b/swmmio/utils/text.py
index a04913b..3442f81 100644
--- a/swmmio/utils/text.py
+++ b/swmmio/utils/text.py
@@ -216,23 +216,22 @@ def get_inp_sections_details(inp_path, include_brackets=False):
     found_sects = OrderedDict()
 
     with open(inp_path) as f:
-        for line in f:
-            sect_not_found = True
-            for sect_id, data in INP_OBJECTS.items():
-                # find the start of an INP section
-                search_tag = format_inp_section_header(sect_id)
-                if search_tag.lower() in line.lower():
-                    if include_brackets:
-                        sect_id = '[{}]'.format(sect_id.upper())
-                    found_sects[sect_id.upper()] = data
-                    sect_not_found = False
-                    break
-            if sect_not_found:
-                if '[' and ']' in line:
-                    h = line.strip()
-                    if not include_brackets:
-                        h = h.replace('[', '').replace(']', '')
-                    found_sects[h] = OrderedDict(columns=['blob'])
+        txt = f.read()
+        section_dict = {key:txt.find("[{}]".format(key)) for key in INP_OBJECTS.keys() if txt.find("[{}]".format(key)) >= 0}
+        section_dict = sorted(section_dict, key=section_dict.get)
+        bracketed_words = re.findall(r"\[([A-Za-z0-9_]+)\]",txt)
+
+        for sect in bracketed_words:
+            if sect not in section_dict:
+                if not include_brackets:
+                    h = sect.replace('[', '').replace(']', '')
+                found_sects[h] = OrderedDict(columns=['blob'])
+            else:
+                if include_brackets:
+                    sect_id = '[{}]'.format(sect.upper())
+                else:
+                    sect_id = sect.upper()
+                found_sects[sect_id] = INP_OBJECTS[sect]
 
     # make necessary adjustments to columns that change based on options
     ops_cols = INP_OBJECTS['OPTIONS']['columns']

From d556e1fdb2e2cb20a30ac9f814acf8206ff1fd88 Mon Sep 17 00:00:00 2001
From: jackieff <jackieff@umich.edu>
Date: Wed, 5 May 2021 17:12:44 -0400
Subject: [PATCH 2/5] Eliminating need to scan inp file twice for
 dataframe_from_inp

---
 swmmio/utils/dataframes.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/swmmio/utils/dataframes.py b/swmmio/utils/dataframes.py
index 612f2f9..948b735 100644
--- a/swmmio/utils/dataframes.py
+++ b/swmmio/utils/dataframes.py
@@ -116,22 +116,18 @@ def dataframe_from_inp(inp_path, section, additional_cols=None, quote_replace='
     :param quote_replace:
     :return:
     """
-
+    from swmmio.defs import INP_OBJECTS
     # format the section header for look up in headers OrderedDict
     sect = remove_braces(section).upper()
 
-    # get list of all section headers in inp to use as section ending flags
-    headers = get_inp_sections_details(inp_path, include_brackets=False)
-
-    if sect not in headers:
-        warnings.warn(f'{sect} section not found in {inp_path}')
-        return pd.DataFrame()
-
     # extract the string and read into a dataframe
     start_string = format_inp_section_header(section)
-    end_strings = [format_inp_section_header(h) for h in headers.keys()]
+    end_strings = [format_inp_section_header(h) for h in INP_OBJECTS.keys()]
     s = extract_section_of_file(inp_path, start_string, end_strings, **kwargs)
 
+    if len(s.replace(start_string, "").replace("\n","")) == 0:
+        warnings.warn(f'{sect} section not found in {inp_path}')
+        return pd.DataFrame()
     # replace occurrences of double quotes ""
     s = s.replace('""', quote_replace)
 

From 339a29daf7bb3f86f495a85a9820533e6145b528 Mon Sep 17 00:00:00 2001
From: jackieff <jackieff@umich.edu>
Date: Wed, 5 May 2021 17:21:33 -0400
Subject: [PATCH 3/5] Fixing headers error

---
 swmmio/utils/dataframes.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/swmmio/utils/dataframes.py b/swmmio/utils/dataframes.py
index 948b735..ae5c512 100644
--- a/swmmio/utils/dataframes.py
+++ b/swmmio/utils/dataframes.py
@@ -134,9 +134,9 @@ def dataframe_from_inp(inp_path, section, additional_cols=None, quote_replace='
     # and get the list of columns to use for parsing this section
     # add any additional columns needed for special cases (build instructions)
     additional_cols = [] if additional_cols is None else additional_cols
-    cols = headers[sect]['columns'] + additional_cols
+    cols = INP_OBJECTS[sect]['columns'] + additional_cols
 
-    if headers[sect]['columns'][0] == 'blob':
+    if INP_OBJECTS[sect]['columns'][0] == 'blob':
         # return the whole row, without specific col headers
         return pd.read_csv(StringIO(s), delim_whitespace=False)
     else:

From 1292b22805e09a0e81f49ce381e47b150c5c828a Mon Sep 17 00:00:00 2001
From: Adam Erispaha <aerispaha@gmail.com>
Date: Wed, 5 May 2021 16:25:10 -0500
Subject: [PATCH 4/5] minor change to code style

---
 swmmio/utils/text.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/swmmio/utils/text.py b/swmmio/utils/text.py
index 3442f81..22cd16e 100644
--- a/swmmio/utils/text.py
+++ b/swmmio/utils/text.py
@@ -217,9 +217,12 @@ def get_inp_sections_details(inp_path, include_brackets=False):
 
     with open(inp_path) as f:
         txt = f.read()
-        section_dict = {key:txt.find("[{}]".format(key)) for key in INP_OBJECTS.keys() if txt.find("[{}]".format(key)) >= 0}
+        section_dict = {
+            key: txt.find("[{}]".format(key)) for key in INP_OBJECTS.keys() 
+            if txt.find("[{}]".format(key)) >= 0
+        }
         section_dict = sorted(section_dict, key=section_dict.get)
-        bracketed_words = re.findall(r"\[([A-Za-z0-9_]+)\]",txt)
+        bracketed_words = re.findall(r"\[([A-Za-z0-9_]+)\]", txt)
 
         for sect in bracketed_words:
             if sect not in section_dict:

From 3331597ecc1cf916adc990479261b0de2bd8c63d Mon Sep 17 00:00:00 2001
From: jackieff <jackieff@umich.edu>
Date: Wed, 5 May 2021 17:49:03 -0400
Subject: [PATCH 5/5] Reverting to 31766d6

---
 swmmio/utils/dataframes.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/swmmio/utils/dataframes.py b/swmmio/utils/dataframes.py
index ae5c512..612f2f9 100644
--- a/swmmio/utils/dataframes.py
+++ b/swmmio/utils/dataframes.py
@@ -116,27 +116,31 @@ def dataframe_from_inp(inp_path, section, additional_cols=None, quote_replace='
     :param quote_replace:
     :return:
     """
-    from swmmio.defs import INP_OBJECTS
+
     # format the section header for look up in headers OrderedDict
     sect = remove_braces(section).upper()
 
+    # get list of all section headers in inp to use as section ending flags
+    headers = get_inp_sections_details(inp_path, include_brackets=False)
+
+    if sect not in headers:
+        warnings.warn(f'{sect} section not found in {inp_path}')
+        return pd.DataFrame()
+
     # extract the string and read into a dataframe
     start_string = format_inp_section_header(section)
-    end_strings = [format_inp_section_header(h) for h in INP_OBJECTS.keys()]
+    end_strings = [format_inp_section_header(h) for h in headers.keys()]
     s = extract_section_of_file(inp_path, start_string, end_strings, **kwargs)
 
-    if len(s.replace(start_string, "").replace("\n","")) == 0:
-        warnings.warn(f'{sect} section not found in {inp_path}')
-        return pd.DataFrame()
     # replace occurrences of double quotes ""
     s = s.replace('""', quote_replace)
 
     # and get the list of columns to use for parsing this section
     # add any additional columns needed for special cases (build instructions)
     additional_cols = [] if additional_cols is None else additional_cols
-    cols = INP_OBJECTS[sect]['columns'] + additional_cols
+    cols = headers[sect]['columns'] + additional_cols
 
-    if INP_OBJECTS[sect]['columns'][0] == 'blob':
+    if headers[sect]['columns'][0] == 'blob':
         # return the whole row, without specific col headers
         return pd.read_csv(StringIO(s), delim_whitespace=False)
     else: