From 2ccb32c0e751c56b734fcba6f513e13b30626b03 Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Tue, 26 Jun 2018 10:05:55 -0400
Subject: [PATCH 01/35] Eliminate redundancy in create_object_html usage

---
 looper/html_reports.py | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)
diff --git a/looper/html_reports.py b/looper/html_reports.py
index 5cc6e92e5..dd64a6ea9 100644
--- a/looper/html_reports.py
+++ b/looper/html_reports.py
@@ -576,7 +576,7 @@ class HTMLReportBuilder():
 
     def __init__(self, prj):
         """
-        The Project defines the instance; establish an iteration counter.
+        The Project defines the instance.
 
         :param Project prj: Project with which to work/operate on
         """
@@ -632,23 +632,27 @@ def create_sample_parent_html(objs):
                 html_file.write(HTML_FOOTER)
                 html_file.close()
 
-        def create_object_html(objs, nb, type, filename, index_html):
+        def create_object_html(single_object, all_objects):
             # Generates a page for an individual object type with all of its
             # plots from each sample
             reports_dir = os.path.join(self.prj.metadata.output_dir, "reports")
-            object_path = os.path.join(reports_dir,
-                                       filename.replace(' ', '_').lower())
+            # Generate object type and filename
+            type = single_object['key'].drop_duplicates()
+            filename = str(type) + ".html"
+            object_path = os.path.join(
+                            reports_dir, filename.replace(' ', '_').lower())
             if not os.path.exists(os.path.dirname(object_path)):
                 os.makedirs(os.path.dirname(object_path))
             with open(object_path, 'w') as html_file:
                 html_file.write(HTML_HEAD_OPEN)
-                html_file.write(create_navbar(nb, reports_dir))
+                html_file.write(create_navbar(all_objects, reports_dir))
                 html_file.write(HTML_HEAD_CLOSE)
+                
                 html_file.write("\t\t<h4>{} objects</h4>\n".format(str(type)))
                 links = []
                 figures = []
                 warnings = []
-                for i, row in objs.iterrows():
+                for i, row in single_object.iterrows():
                     page_path = os.path.join(
                                  self.prj.metadata.results_subdir,
                                  row['sample_name'], row['filename'])
@@ -690,7 +694,8 @@ def create_object_html(objs, nb, type, filename, index_html):
                 html_file.close()
 
             if warnings:
-                _LOGGER.warn("Warning: " + filename.replace(' ', '_').lower() +
+                _LOGGER.warn("create_object_html: " +
+                             filename.replace(' ', '_').lower() +
                              " references nonexistent object files")
                 _LOGGER.debug(filename.replace(' ', '_').lower() +
                               " nonexistent files: " +
@@ -1230,10 +1235,8 @@ def create_index_html(objs, stats):
 
             # Create objects pages
             for key in objs['key'].drop_duplicates().sort_values():
-                objects = objs[objs['key'] == key]
-                object_filename = str(key) + ".html"
-                create_object_html(
-                    objects, objs, key, object_filename, objs_html_path)
+                single_object = objs[objs['key'] == key]
+                create_object_html(single_object, objs)
 
             # Create parent objects page with links to each object type
             create_object_parent_html(objs)

From 33d7d2c472172438bebf6a82c74a40c0070a76b5 Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Tue, 26 Jun 2018 10:11:29 -0400
Subject: [PATCH 02/35] Improve variable naming; eliminate unused var in
 create_sample_html_html

---
 looper/html_reports.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/looper/html_reports.py b/looper/html_reports.py
index dd64a6ea9..7f801b1ec 100644
--- a/looper/html_reports.py
+++ b/looper/html_reports.py
@@ -585,7 +585,7 @@ def __init__(self, prj):
         
     def __call__(self, objs, stats):
 
-        def create_object_parent_html(objs):
+        def create_object_parent_html(all_objects):
             # Generates a page listing all the project objects with links
             # to individual object pages
             reports_dir = os.path.join(self.prj.metadata.output_dir,
@@ -595,11 +595,11 @@ def create_object_parent_html(objs):
                 os.makedirs(os.path.dirname(object_parent_path))
             with open(object_parent_path, 'w') as html_file:
                 html_file.write(HTML_HEAD_OPEN)
-                html_file.write(create_navbar(objs, reports_dir))
+                html_file.write(create_navbar(all_objects, reports_dir))
                 html_file.write(HTML_HEAD_CLOSE)
                 html_file.write(GENERIC_HEADER.format(header="Objects"))
                 html_file.write(GENERIC_LIST_HEADER)
-                for key in objs['key'].drop_duplicates().sort_values():
+                for key in all_objects['key'].drop_duplicates().sort_values():
                     page_name = key + ".html"
                     page_path = os.path.join(reports_dir, page_name.replace(' ', '_').lower())
                     page_relpath = os.path.relpath(page_path, reports_dir)
@@ -608,7 +608,7 @@ def create_object_parent_html(objs):
                 html_file.write(HTML_FOOTER)
                 html_file.close()
 
-        def create_sample_parent_html(objs):
+        def create_sample_parent_html(all_samples):
             # Generates a page listing all the project samples with links
             # to individual sample pages
             reports_dir = os.path.join(self.prj.metadata.output_dir,
@@ -618,7 +618,7 @@ def create_sample_parent_html(objs):
                 os.makedirs(os.path.dirname(sample_parent_path))
             with open(sample_parent_path, 'w') as html_file:
                 html_file.write(HTML_HEAD_OPEN)
-                html_file.write(create_navbar(objs, reports_dir))
+                html_file.write(create_navbar(all_samples, reports_dir))
                 html_file.write(HTML_HEAD_CLOSE)
                 html_file.write(GENERIC_HEADER.format(header="Samples"))
                 html_file.write(GENERIC_LIST_HEADER)
@@ -820,8 +820,7 @@ def create_status_html(all_samples):
                 if warning:
                     _LOGGER.warn("The stats_summary.tsv file is incomplete")
 
-        def create_sample_html(all_samples, sample_name, sample_stats,
-                               index_html):
+        def create_sample_html(all_samples, sample_name, sample_stats):
             # Produce an HTML page containing all of a sample's objects
             # and the sample summary statistics
             reports_dir = os.path.join(self.prj.metadata.output_dir, "reports")
@@ -1213,9 +1212,9 @@ def create_index_html(objs, stats):
                     for value in table_row:                 
                         if value == sample_name:
                             # Generate individual sample page and return link
-                            sample_page = create_sample_html(
-                                            objs, sample_name,
-                                            stats[sample_pos], objs_html_path)
+                            sample_page = create_sample_html(objs,
+                                                             sample_name,
+                                                             stats[sample_pos])
                             # Treat sample_name as a link to sample page
                             objs_html_file.write(TABLE_ROWS_LINK.format(
                                 html_page=sample_page,

From b8adf48e5327c666213b53405c955ee5f6399c26 Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Tue, 26 Jun 2018 10:48:32 -0400
Subject: [PATCH 03/35] Improve commenting and readability

---
 looper/html_reports.py | 367 +++++++++++++++++++++++------------------
 1 file changed, 204 insertions(+), 163 deletions(-)

diff --git a/looper/html_reports.py b/looper/html_reports.py
index 7f801b1ec..7abf5333d 100644
--- a/looper/html_reports.py
+++ b/looper/html_reports.py
@@ -8,7 +8,7 @@
 from peppy.utils import alpha_cased
 
 _LOGGER = logging.getLogger('HTMLReportBuilder')
-    
+
 __author__ = "Jason Smith"
 __email__ = "jasonsmith@virginia.edu"
 
@@ -75,7 +75,6 @@
     </body>
 </html>
 """
-
 HTML_VARS = ["HTML_HEAD_OPEN", "HTML_TITLE", "HTML_HEAD_CLOSE",
              "HTML_BUTTON", "HTML_FIGURE", "HTML_FOOTER"]
 
@@ -177,7 +176,6 @@
         <li class="navbar"><a href='{samples_html}'>Samples</a></li>
     </ul>
 """
-
 NAVBAR_VARS = ["HTML_NAVBAR_STYLE_BASIC", "HTML_NAVBAR_BASIC", "NAVBAR_HEADER",
                "NAVBAR_LOGO", "NAVBAR_DROPDOWN_HEADER", "NAVBAR_DROPDOWN_LINK",
                "NAVBAR_DROPDOWN_DIVIDER", "NAVBAR_DROPDOWN_FOOTER",
@@ -192,17 +190,14 @@
 """\
       <ul style="list-style-type:circle">
 """
-
 GENERIC_LIST_ENTRY = \
 """\
         <li><a href='{page}'>{label}</a></li>
 """
-
 GENERIC_LIST_FOOTER = \
 """
       </ul> 
 """
-
 GENERIC_VARS = ["HTML_HEAD_OPEN", "HTML_TITLE", "HTML_HEAD_CLOSE",
                 "HTML_FOOTER", "GENERIC_HEADER", "GENERIC_LIST_HEADER",
                 "GENERIC_LIST_ENTRY", "GENERIC_LIST_FOOTER"]
@@ -315,12 +310,10 @@
           <thead>
             <tr class="stats-firstrow">
 """
-
 TABLE_COLS = \
 """\
               <th class="rotate-45"><div><span>{col_val}</span></div></th>
 """
-
 TABLE_COLS_FOOTER = \
 """\
             </tr>
@@ -331,7 +324,6 @@
 """\
             <tr>    
 """
-
 TABLE_ROWS = \
 """\
               <td class="text"><span>{row_val}</span></td>
@@ -340,19 +332,16 @@
 """\
             </tr>
 """
-
 TABLE_FOOTER = \
 """\
           </tbody>
         </table>
       </div>
 """
-
 TABLE_ROWS_LINK = \
 """\
               <td style="cursor:pointer" onclick="location.href='{html_page}'"><a class="LN1 LN2 LN3 LN4 LN5" href="{page_name}" target="_top">{link_name}</a></td>
 """
-
 LINKS_STYLE_BASIC = \
 """
 a.LN1 {
@@ -458,7 +447,6 @@
             vertical-align: middle;
         }
 """
-
 SAMPLE_PLOTS = \
 """\
             <figure class="figure">
@@ -466,7 +454,6 @@
                 <a href='{path}'><figcaption class="figure-caption text-left">'{label}'</figcaption></a>
             </figure>
 """
-
 SAMPLE_FOOTER = \
 """
         <p><a href='{index_html_path}'>Return to summary page</a></p>
@@ -526,11 +513,10 @@
         </div>
         <hr>
 """
-
 STATUS_VARS = ["STATUS_HEADER", "STATUS_TABLE_HEAD", "STATUS_BUTTON",
                "STATUS_ROW_HEADER", "STATUS_ROW_VALUE", "STATUS_ROW_LINK",
                "STATUS_ROW_FOOTER", "STATUS_FOOTER"]
-          
+
 # Objects-page-related
 OBJECTS_HEADER = \
 """\
@@ -570,7 +556,7 @@
 
 __all__ = HTML_VARS + NAVBAR_VARS + GENERIC_VARS + \
           TABLE_VARS + SAMPLE_VARS + STATUS_VARS + OBJECTS_VARS
-          
+
 class HTMLReportBuilder():
     """ Generate HTML summary report for project/samples """
 
@@ -582,12 +568,19 @@ def __init__(self, prj):
         """
         super(HTMLReportBuilder, self).__init__()
         self.prj = prj
-        
+
     def __call__(self, objs, stats):
+        """ Do the work of the subcommand/program. """
 
         def create_object_parent_html(all_objects):
-            # Generates a page listing all the project objects with links
-            # to individual object pages
+            """
+            Generates a page listing all the project objects with links
+            to individual object pages
+            
+            :param panda.DataFrame all_objects: project level dataframe 
+                containing any reported objects for all samples
+            """
+
             reports_dir = os.path.join(self.prj.metadata.output_dir,
                                        "reports")
             object_parent_path = os.path.join(reports_dir, "objects.html")
@@ -609,8 +602,14 @@ def create_object_parent_html(all_objects):
                 html_file.close()
 
         def create_sample_parent_html(all_samples):
-            # Generates a page listing all the project samples with links
-            # to individual sample pages
+            """
+            Generates a page listing all the project samples with links
+            to individual sample pages
+            
+            :param panda.DataFrame all_samples: project level dataframe 
+                containing any reported objects for all samples 
+            """
+
             reports_dir = os.path.join(self.prj.metadata.output_dir,
                                        "reports")
             sample_parent_path = os.path.join(reports_dir, "samples.html")
@@ -633,8 +632,16 @@ def create_sample_parent_html(all_samples):
                 html_file.close()
 
         def create_object_html(single_object, all_objects):
-            # Generates a page for an individual object type with all of its
-            # plots from each sample
+            """
+            Generates a page for an individual object type with all of its
+            plots from each sample
+            
+            :param panda.DataFrame single_object: contains reference 
+                information for an individual object type for all samples
+            :param panda.DataFrame all_objects: project level dataframe 
+                containing any reported objects for all samples
+            """
+
             reports_dir = os.path.join(self.prj.metadata.output_dir, "reports")
             # Generate object type and filename
             type = single_object['key'].drop_duplicates()
@@ -701,128 +708,17 @@ def create_object_html(single_object, all_objects):
                               " nonexistent files: " +
                               ','.join(str(file) for file in warnings))
 
-        def create_status_html(all_samples):
-            # Generates a page listing all the samples, their run status, their
-            # log file, and the total runtime if completed.
-            reports_dir = os.path.join(self.prj.metadata.output_dir, "reports")
-            status_html_path = os.path.join(reports_dir, "status.html")
-            if not os.path.exists(os.path.dirname(status_html_path)):
-                os.makedirs(os.path.dirname(status_html_path))
-            with open(status_html_path, 'w') as html_file:
-                html_file.write(HTML_HEAD_OPEN)
-                html_file.write(create_navbar(all_samples, reports_dir))
-                html_file.write(HTML_HEAD_CLOSE)
-                html_file.write(STATUS_HEADER)
-                html_file.write(STATUS_TABLE_HEAD)
-                warning = False
-                for sample in self.prj.samples:
-                    sample_name = str(sample.sample_name)
-                    # Grab the status flag for the current sample
-                    flag = glob.glob(os.path.join(
-                                        self.prj.metadata.results_subdir,
-                                        sample_name, '*.flag'))               
-                    if not flag:
-                        button_class = "table-danger"
-                        flag = "Missing"
-                        _LOGGER.warn("create_status_html: No flag file found for {}".format(sample_name))
-                    elif len(flag) > 1:
-                        button_class = "table-warning"
-                        flag = "Multiple"
-                        _LOGGER.warn("create_status_html: Multiple flag files found for {}".format(sample_name))
-                    else:
-                        if "completed" in str(flag):
-                            button_class = "table-success"
-                            flag = "Completed"
-                        elif "running" in str(flag):
-                            button_class = "table-warning"
-                            flag = "Running"
-                        elif "failed" in str(flag):
-                            button_class = "table-danger"
-                            flag = "Failed"
-                        else:
-                            button_class = "table-secondary"
-                            flag = "Unknown"
-
-                    # Create table entry for each sample
-                    html_file.write(STATUS_ROW_HEADER)
-                    # First Col: Sample_Name (w/ link to sample page)
-                    page_name = sample_name + ".html"
-                    page_path = os.path.join(reports_dir, page_name.replace(' ', '_').lower())
-                    page_relpath = os.path.relpath(page_path, reports_dir)
-                    html_file.write(STATUS_ROW_LINK.format(
-                                        row_class="",
-                                        file_link=page_relpath,
-                                        link_name=sample_name))
-                    # Second Col: Status (color-coded)
-                    html_file.write(STATUS_ROW_VALUE.format(
-                                        row_class=button_class,
-                                        value=flag))
-                    # Third Col: Log File (w/ link to file)
-                    single_sample = all_samples[all_samples['sample_name'] == sample_name]
-                    if single_sample.empty:
-                        # When there is no objects.tsv file, search for the
-                        # presence of log, profile, and command files
-                        log_name = os.path.basename(str(glob.glob(os.path.join(
-                                    self.prj.metadata.results_subdir,
-                                    sample_name, '*log.md'))[0]))
-                        # Currently unused. Future?
-                        # profile_name = os.path.basename(str(glob.glob(os.path.join(
-                                            # self.prj.metadata.results_subdir,
-                                            # sample_name, '*profile.tsv'))[0]))
-                        # command_name = os.path.basename(str(glob.glob(os.path.join(
-                                            # self.prj.metadata.results_subdir,
-                                            # sample_name, '*commands.sh'))[0]))
-                    else:
-                        log_name = str(single_sample.iloc[0]['annotation']) + "_log.md"
-                        # Currently unused. Future?
-                        # profile_name = str(single_sample.iloc[0]['annotation']) + "_profile.tsv"
-                        # command_name = str(single_sample.iloc[0]['annotation']) + "_commands.sh"
-                    log_file = os.path.join(self.prj.metadata.results_subdir,
-                                            sample_name, log_name)
-                    log_relpath = os.path.relpath(log_file, reports_dir)
-                    if os.path.isfile(log_file):
-                        html_file.write(STATUS_ROW_LINK.format(
-                                            row_class="",
-                                            file_link=log_relpath,
-                                            link_name=log_name))
-                    else:
-                        # Leave cell empty
-                        html_file.write(STATUS_ROW_LINK.format(
-                                            row_class="",
-                                            file_link="",
-                                            link_name=""))
-                    # Fourth Col: Sample runtime (if completed)
-                    # If Completed, use stats.tsv
-                    stats_file = os.path.join(
-                                    self.prj.metadata.results_subdir,
-                                    sample_name, "stats.tsv")
-                    if os.path.isfile(stats_file):
-                        t = _pd.read_table(stats_file, header=None,
-                                           names=['key', 'value', 'pl'])
-                        t.drop_duplicates(subset=['key', 'pl'],
-                                          keep='last', inplace=True)
-                        try:
-                            time = str(t[t['key'] == 'Time'].iloc[0]['value'])
-                            html_file.write(STATUS_ROW_VALUE.format(
-                                            row_class="",
-                                            value=str(time)))
-                        except IndexError:
-                            warning = True                       
-                    else:
-                        html_file.write(STATUS_ROW_VALUE.format(
-                                            row_class=button_class,
-                                            value="Unknown"))
-                    html_file.write(STATUS_ROW_FOOTER)
-
-                html_file.write(STATUS_FOOTER)
-                html_file.write(HTML_FOOTER)
-                html_file.close()
-                if warning:
-                    _LOGGER.warn("The stats_summary.tsv file is incomplete")
-
         def create_sample_html(all_samples, sample_name, sample_stats):
-            # Produce an HTML page containing all of a sample's objects
-            # and the sample summary statistics
+            """
+            Produce an HTML page containing all of a sample's objects
+            and the sample summary statistics
+            
+            :param panda.DataFrame all_samples: project level dataframe 
+                containing any reported objects for all samples
+            :param str sample_name: the name of the current sample
+            :param list stats: pipeline run statistics for the current sample
+            """
+
             reports_dir = os.path.join(self.prj.metadata.output_dir, "reports")
             html_filename = sample_name + ".html"
             html_page = os.path.join(
@@ -983,17 +879,154 @@ def create_sample_html(all_samples, sample_name, sample_stats):
             # Return the path to the newly created sample page
             return sample_page_relpath
 
+        def create_status_html(all_samples):
+            """
+            Generates a page listing all the samples, their run status, their
+            log file, and the total runtime if completed.
+            
+            :param panda.DataFrame all_samples: project level dataframe 
+                containing any reported objects for all samples
+            """
+
+            reports_dir = os.path.join(self.prj.metadata.output_dir, "reports")
+            status_html_path = os.path.join(reports_dir, "status.html")
+            if not os.path.exists(os.path.dirname(status_html_path)):
+                os.makedirs(os.path.dirname(status_html_path))
+            with open(status_html_path, 'w') as html_file:
+                html_file.write(HTML_HEAD_OPEN)
+                html_file.write(create_navbar(all_samples, reports_dir))
+                html_file.write(HTML_HEAD_CLOSE)
+                html_file.write(STATUS_HEADER)
+                html_file.write(STATUS_TABLE_HEAD)
+                warning = False
+                for sample in self.prj.samples:
+                    sample_name = str(sample.sample_name)
+                    # Grab the status flag for the current sample
+                    flag = glob.glob(os.path.join(
+                                        self.prj.metadata.results_subdir,
+                                        sample_name, '*.flag'))               
+                    if not flag:
+                        button_class = "table-danger"
+                        flag = "Missing"
+                        _LOGGER.warn("create_status_html: No flag file found for {}".format(sample_name))
+                    elif len(flag) > 1:
+                        button_class = "table-warning"
+                        flag = "Multiple"
+                        _LOGGER.warn("create_status_html: Multiple flag files found for {}".format(sample_name))
+                    else:
+                        if "completed" in str(flag):
+                            button_class = "table-success"
+                            flag = "Completed"
+                        elif "running" in str(flag):
+                            button_class = "table-warning"
+                            flag = "Running"
+                        elif "failed" in str(flag):
+                            button_class = "table-danger"
+                            flag = "Failed"
+                        else:
+                            button_class = "table-secondary"
+                            flag = "Unknown"
+
+                    # Create table entry for each sample
+                    html_file.write(STATUS_ROW_HEADER)
+                    # First Col: Sample_Name (w/ link to sample page)
+                    page_name = sample_name + ".html"
+                    page_path = os.path.join(reports_dir, page_name.replace(' ', '_').lower())
+                    page_relpath = os.path.relpath(page_path, reports_dir)
+                    html_file.write(STATUS_ROW_LINK.format(
+                                        row_class="",
+                                        file_link=page_relpath,
+                                        link_name=sample_name))
+                    # Second Col: Status (color-coded)
+                    html_file.write(STATUS_ROW_VALUE.format(
+                                        row_class=button_class,
+                                        value=flag))
+                    # Third Col: Log File (w/ link to file)
+                    single_sample = all_samples[all_samples['sample_name'] == sample_name]
+                    if single_sample.empty:
+                        # When there is no objects.tsv file, search for the
+                        # presence of log, profile, and command files
+                        log_name = os.path.basename(str(glob.glob(os.path.join(
+                                    self.prj.metadata.results_subdir,
+                                    sample_name, '*log.md'))[0]))
+                        # Currently unused. Future?
+                        # profile_name = os.path.basename(str(glob.glob(os.path.join(
+                                            # self.prj.metadata.results_subdir,
+                                            # sample_name, '*profile.tsv'))[0]))
+                        # command_name = os.path.basename(str(glob.glob(os.path.join(
+                                            # self.prj.metadata.results_subdir,
+                                            # sample_name, '*commands.sh'))[0]))
+                    else:
+                        log_name = str(single_sample.iloc[0]['annotation']) + "_log.md"
+                        # Currently unused. Future?
+                        # profile_name = str(single_sample.iloc[0]['annotation']) + "_profile.tsv"
+                        # command_name = str(single_sample.iloc[0]['annotation']) + "_commands.sh"
+                    log_file = os.path.join(self.prj.metadata.results_subdir,
+                                            sample_name, log_name)
+                    log_relpath = os.path.relpath(log_file, reports_dir)
+                    if os.path.isfile(log_file):
+                        html_file.write(STATUS_ROW_LINK.format(
+                                            row_class="",
+                                            file_link=log_relpath,
+                                            link_name=log_name))
+                    else:
+                        # Leave cell empty
+                        html_file.write(STATUS_ROW_LINK.format(
+                                            row_class="",
+                                            file_link="",
+                                            link_name=""))
+                    # Fourth Col: Sample runtime (if completed)
+                    # If Completed, use stats.tsv
+                    stats_file = os.path.join(
+                                    self.prj.metadata.results_subdir,
+                                    sample_name, "stats.tsv")
+                    if os.path.isfile(stats_file):
+                        t = _pd.read_table(stats_file, header=None,
+                                           names=['key', 'value', 'pl'])
+                        t.drop_duplicates(subset=['key', 'pl'],
+                                          keep='last', inplace=True)
+                        try:
+                            time = str(t[t['key'] == 'Time'].iloc[0]['value'])
+                            html_file.write(STATUS_ROW_VALUE.format(
+                                            row_class="",
+                                            value=str(time)))
+                        except IndexError:
+                            warning = True                       
+                    else:
+                        html_file.write(STATUS_ROW_VALUE.format(
+                                            row_class=button_class,
+                                            value="Unknown"))
+                    html_file.write(STATUS_ROW_FOOTER)
+
+                html_file.write(STATUS_FOOTER)
+                html_file.write(HTML_FOOTER)
+                html_file.close()
+                if warning:
+                    _LOGGER.warn("The stats_summary.tsv file is incomplete")
+
         def create_navbar(objs, wd):
-            # Return a string containing the navbar prebuilt html
-            # Includes link to all the pages
-            objs_html_path = "{root}_summary.html".format(
+            """
+            Return a string containing the navbar prebuilt html.
+            Generates links to each page relative to the directory
+            of interest.
+            
+            :param pandas.DataFrame objs: project results dataframe containing
+                sample or object data
+            :param path wd: the working directory of the current HTML page 
+                being generated, enables navbar links relative to page
+            """
+
+            # Generate full index.html path
+            index_html_path = "{root}_summary.html".format(
                 root=os.path.join(self.prj.metadata.output_dir, self.prj.name))
             reports_dir = os.path.join(self.prj.metadata.output_dir,
                                        "reports")
-            index_page_relpath = os.path.relpath(objs_html_path, wd)
+            # Generate index.html path relative to the HTML file under 
+            # construction
+            index_page_relpath = os.path.relpath(index_html_path, wd)
             navbar_header = NAVBAR_HEADER.format(logo=NAVBAR_LOGO,
                                                  index_html=index_page_relpath)
-            # Add link to STATUS page
+            # Add link to status.html page
             status_page = os.path.join(reports_dir, "status.html")
             # Use relative linking structure
             relpath = os.path.relpath(status_page, wd)
@@ -1061,8 +1094,8 @@ def create_navbar(objs, wd):
                                NAVBAR_FOOTER]))
 
         def create_project_objects():
-            # If a protocol produces project level summaries add those as
-            # additional figures/links
+            """ Add project level summaries as additional figures/links """
+
             all_protocols = [sample.protocol for sample in self.prj.samples]
             # For each protocol report the project summarizers' results
             for protocol in set(all_protocols):
@@ -1141,19 +1174,27 @@ def create_project_objects():
                                    OBJECTS_LIST_FOOTER]))
 
         def create_index_html(objs, stats):
-            # Generate an index.html style project home page w/ sample summary
-            # statistics
+            """
+            Generate an index.html style project home page w/ sample summary
+            statistics
+            
+            :param panda.DataFrame objs: project level dataframe containing
+                any reported objects for all samples
+            :param list stats: a summary file of pipeline statistics for each
+                analyzed sample                
+            """
+
             objs.drop_duplicates(keep='last', inplace=True)
             reports_dir = os.path.join(self.prj.metadata.output_dir, "reports")
-            # Generate parent index.html page
-            objs_html_path = "{root}_summary.html".format(
+            # Generate parent index.html page path
+            index_html_path = "{root}_summary.html".format(
                 root=os.path.join(self.prj.metadata.output_dir, self.prj.name))
-            # Generate parent objects.html page
+            # Generate parent objects.html page path
             object_parent_path = os.path.join(reports_dir, "objects.html")
-            # Generate parent samples.html page
+            # Generate parent samples.html page path
             sample_parent_path = os.path.join(reports_dir, "samples.html")
 
-            objs_html_file = open(objs_html_path, 'w')
+            objs_html_file = open(index_html_path, 'w')
             objs_html_file.write(HTML_HEAD_OPEN)
             objs_html_file.write("\t\t<style>\n")
             objs_html_file.write(TABLE_STYLE_ROTATED_HEADER)
@@ -1175,7 +1216,8 @@ def create_index_html(objs, stats):
             objs_html_file.write(HTML_BUTTON.format(
                 file_path=stats_relpath, label="Stats Summary File"))
 
-            # Add stats summary table to index page
+            # Add stats summary table to index page and produce individual
+            # sample pages
             if os.path.isfile(tsv_outfile_path):
                 objs_html_file.write(TABLE_HEADER)
                 # Produce table columns
@@ -1229,6 +1271,7 @@ def create_index_html(objs, stats):
                 objs_html_file.write(TABLE_FOOTER)
             else:
                 _LOGGER.warn("No stats file '%s'", stats_file)
+
             # Create parent samples page with links to each sample
             create_sample_parent_html(objs)
 
@@ -1260,9 +1303,7 @@ def create_index_html(objs, stats):
         create_index_html(objs, stats)
 
 def uniqify(seq):
-    """
-    Fast way to uniqify while preserving input order.
-    """
+    """ Fast way to uniqify while preserving input order. """
     # http://stackoverflow.com/questions/480214/
     seen = set()
     seen_add = seen.add

From 200a1a9dce97509dba5d1c84e22b429f9c9e1f0d Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Tue, 26 Jun 2018 14:23:14 -0400
Subject: [PATCH 04/35] Handle missing samples that failed pipeline submission

---
 looper/html_reports.py | 480 ++++++++++++++++++++++-------------------
 1 file changed, 259 insertions(+), 221 deletions(-)

diff --git a/looper/html_reports.py b/looper/html_reports.py
index 7abf5333d..2ba0ba85e 100644
--- a/looper/html_reports.py
+++ b/looper/html_reports.py
@@ -623,11 +623,15 @@ def create_sample_parent_html(all_samples):
                 html_file.write(GENERIC_LIST_HEADER)
                 for sample in self.prj.samples:
                     sample_name = str(sample.sample_name)
-                    page_name = sample_name + ".html"
-                    page_path = os.path.join(reports_dir, page_name.replace(' ', '_').lower())
-                    page_relpath = os.path.relpath(page_path, reports_dir)
-                    html_file.write(GENERIC_LIST_ENTRY.format(
-                                    page=page_relpath, label=sample_name))
+                    sample_dir = os.path.join(
+                            self.prj.metadata.results_subdir, sample_name)
+                    # Confirm sample directory exists, then build page
+                    if os.path.exists(sample_dir):   
+                        page_name = sample_name + ".html"
+                        page_path = os.path.join(reports_dir, page_name.replace(' ', '_').lower())
+                        page_relpath = os.path.relpath(page_path, reports_dir)
+                        html_file.write(GENERIC_LIST_ENTRY.format(
+                                        page=page_relpath, label=sample_name))
                 html_file.write(HTML_FOOTER)
                 html_file.close()
 
@@ -736,142 +740,151 @@ def create_sample_html(all_samples, sample_name, sample_stats):
                 html_file.write(create_navbar(all_samples, reports_dir))
                 html_file.write(HTML_HEAD_CLOSE)
                 html_file.write("\t\t<h4>{}</h4>\n".format(str(sample_name)))
-                if single_sample.empty:
-                    # When there is no objects.tsv file, search for the
-                    # presence of log, profile, and command files
-                    log_name = os.path.basename(str(glob.glob(os.path.join(
-                                    self.prj.metadata.results_subdir,
-                                    sample_name, '*log.md'))[0]))
-                    profile_name = os.path.basename(str(glob.glob(os.path.join(
-                                        self.prj.metadata.results_subdir,
-                                        sample_name, '*profile.tsv'))[0]))
-                    command_name = os.path.basename(str(glob.glob(os.path.join(
+                sample_dir = os.path.join(
+                        self.prj.metadata.results_subdir, sample_name)
+                # Confirm sample directory exists, then build page
+                if os.path.exists(sample_dir):                    
+                    if single_sample.empty:
+                        # When there is no objects.tsv file, search for the
+                        # presence of log, profile, and command files
+                        log_name = os.path.basename(str(glob.glob(os.path.join(
                                         self.prj.metadata.results_subdir,
-                                        sample_name, '*commands.sh'))[0]))
-                else:
-                    log_name = str(single_sample.iloc[0]['annotation']) + "_log.md"
-                    profile_name = str(single_sample.iloc[0]['annotation']) + "_profile.tsv"
-                    command_name = str(single_sample.iloc[0]['annotation']) + "_commands.sh"
-                # Get relative path to the log file
-                log_file = os.path.join(self.prj.metadata.results_subdir,
-                                        sample_name, log_name)
-                log_relpath = os.path.relpath(log_file, reports_dir)
-                # Grab the status flag for the current sample
-                flag = glob.glob(os.path.join(self.prj.metadata.results_subdir,
-                                              sample_name, '*.flag'))
-                if not flag:  
-                    button_class = "btn btn-danger"
-                    flag = "Missing"
-                    _LOGGER.warn("create_sample_html: No flag file found for {}".format(sample_name))
-                elif len(flag) > 1:
-                    button_class = "btn btn-warning"
-                    flag = "Multiple"
-                    _LOGGER.warn("create_sample_html: Multiple flag files found for {}".format(sample_name))
-                else:
-                    if "completed" in str(flag):
-                        button_class = "btn btn-success"
-                        flag = "Completed"
-                    elif "running" in str(flag):
-                        button_class = "btn btn-warning"
-                        flag = "Running"
-                    elif "failed" in str(flag):
-                        button_class = "btn btn-danger"
-                        flag = "Failed"
+                                        sample_name, '*log.md'))[0]))
+                        profile_name = os.path.basename(str(glob.glob(os.path.join(
+                                            self.prj.metadata.results_subdir,
+                                            sample_name, '*profile.tsv'))[0]))
+                        command_name = os.path.basename(str(glob.glob(os.path.join(
+                                            self.prj.metadata.results_subdir,
+                                            sample_name, '*commands.sh'))[0]))
                     else:
-                        button_class = "btn btn-secondary"
-                        flag = "Unknown"
-                # Create buttons linking the sample's STATUS, LOG, PROFILE,
-                # COMMANDS, and STATS files
-                stats_relpath = os.path.relpath(os.path.join(
-                                    self.prj.metadata.results_subdir,
-                                    sample_name, "stats.tsv"), reports_dir)
-                profile_relpath = os.path.relpath(os.path.join(
-                                    self.prj.metadata.results_subdir,
-                                    sample_name, profile_name), reports_dir)
-                command_relpath = os.path.relpath(os.path.join(
-                                    self.prj.metadata.results_subdir,
-                                    sample_name, command_name), reports_dir)
-                html_file.write(SAMPLE_BUTTONS.format(
-                                    button_class=button_class,
-                                    flag=flag,
-                                    log_file=log_relpath,
-                                    profile_file=profile_relpath,
-                                    commands_file=command_relpath,
-                                    stats_file=stats_relpath))
-
-                # Add the sample's statistics as a table
-                html_file.write("\t<div class='container-fluid'>\n")
-                html_file.write(SAMPLE_TABLE_HEADER)
-                # Produce table rows
-                for key, value in sample_stats.items():
-                    # Treat sample_name as a link to sample page
-                    if key == 'sample_name':
-                        page_relpath = os.path.relpath(html_page, reports_dir)
-                        html_file.write(SAMPLE_TABLE_FIRSTROW.format(
-                                            row_name=str(key),
-                                            html_page=page_relpath,
-                                            page_name=html_filename,
-                                            link_name=str(value)))
-                    # Otherwise add as a static cell value
+                        log_name = str(single_sample.iloc[0]['annotation']) + "_log.md"
+                        profile_name = str(single_sample.iloc[0]['annotation']) + "_profile.tsv"
+                        command_name = str(single_sample.iloc[0]['annotation']) + "_commands.sh"
+                    # Get relative path to the log file
+                    log_file = os.path.join(self.prj.metadata.results_subdir,
+                                            sample_name, log_name)
+                    log_relpath = os.path.relpath(log_file, reports_dir)
+                    # Grab the status flag for the current sample
+                    flag = glob.glob(os.path.join(self.prj.metadata.results_subdir,
+                                                  sample_name, '*.flag'))
+                    if not flag:  
+                        button_class = "btn btn-danger"
+                        flag = "Missing"
+                        _LOGGER.warn("create_sample_html: No flag file found for {}".format(sample_name))
+                    elif len(flag) > 1:
+                        button_class = "btn btn-warning"
+                        flag = "Multiple"
+                        _LOGGER.warn("create_sample_html: Multiple flag files found for {}".format(sample_name))
                     else:
-                        html_file.write(SAMPLE_TABLE_ROW.format(
-                            row_name=str(key),
-                            row_val=str(value)))
-
-                html_file.write(TABLE_FOOTER)
-                html_file.write("\t  <hr>\n")
-                # Add all the objects for the current sample
-                html_file.write("\t\t<div class='container-fluid'>\n")
-                html_file.write("\t\t<h5>{sample} objects</h5>\n".format(sample=sample_name))
-                links = []
-                figures = []
-                warnings = []
-                for sample_name in single_sample['sample_name'].drop_duplicates().sort_values():
-                    o = single_sample[single_sample['sample_name'] == sample_name]
-                    for i, row in o.iterrows():
-                        image_path = os.path.join(
+                        if "completed" in str(flag):
+                            button_class = "btn btn-success"
+                            flag = "Completed"
+                        elif "running" in str(flag):
+                            button_class = "btn btn-warning"
+                            flag = "Running"
+                        elif "failed" in str(flag):
+                            button_class = "btn btn-danger"
+                            flag = "Failed"
+                        else:
+                            button_class = "btn btn-secondary"
+                            flag = "Unknown"
+                    # Create buttons linking the sample's STATUS, LOG, PROFILE,
+                    # COMMANDS, and STATS files
+                    stats_relpath = os.path.relpath(os.path.join(
                                         self.prj.metadata.results_subdir,
-                                        sample_name, row['anchor_image'])
-                        image_relpath = os.path.relpath(image_path, reports_dir)
-                        page_path = os.path.join(
+                                        sample_name, "stats.tsv"), reports_dir)
+                    profile_relpath = os.path.relpath(os.path.join(
                                         self.prj.metadata.results_subdir,
-                                        sample_name, row['filename'])
-                        page_relpath = os.path.relpath(page_path, reports_dir)
-                        # If the object has a thumbnail image, add as a figure
-                        if os.path.isfile(image_path) and os.path.isfile(page_path):
-                            # If the object has a valid image, add as a figure
-                            if str(image_path).lower().endswith(('.png', '.jpg', '.jpeg', '.svg', '.gif')):
-                                figures.append(SAMPLE_PLOTS.format(
-                                                label=str(row['key']),
-                                                path=page_relpath,
-                                                image=image_relpath))
-                            # Otherwise treat as a link
+                                        sample_name, profile_name), reports_dir)
+                    command_relpath = os.path.relpath(os.path.join(
+                                        self.prj.metadata.results_subdir,
+                                        sample_name, command_name), reports_dir)
+                    html_file.write(SAMPLE_BUTTONS.format(
+                                        button_class=button_class,
+                                        flag=flag,
+                                        log_file=log_relpath,
+                                        profile_file=profile_relpath,
+                                        commands_file=command_relpath,
+                                        stats_file=stats_relpath))
+
+                    # Add the sample's statistics as a table
+                    html_file.write("\t<div class='container-fluid'>\n")
+                    html_file.write(SAMPLE_TABLE_HEADER)
+                    # Produce table rows
+                    for key, value in sample_stats.items():
+                        # Treat sample_name as a link to sample page
+                        if key == 'sample_name':
+                            page_relpath = os.path.relpath(html_page, reports_dir)
+                            html_file.write(SAMPLE_TABLE_FIRSTROW.format(
+                                                row_name=str(key),
+                                                html_page=page_relpath,
+                                                page_name=html_filename,
+                                                link_name=str(value)))
+                        # Otherwise add as a static cell value
+                        else:
+                            html_file.write(SAMPLE_TABLE_ROW.format(
+                                row_name=str(key),
+                                row_val=str(value)))
+
+                    html_file.write(TABLE_FOOTER)
+                    html_file.write("\t  <hr>\n")
+                    # Add all the objects for the current sample
+                    html_file.write("\t\t<div class='container-fluid'>\n")
+                    html_file.write("\t\t<h5>{sample} objects</h5>\n".format(sample=sample_name))
+                    links = []
+                    figures = []
+                    warnings = []
+                    for sample_name in single_sample['sample_name'].drop_duplicates().sort_values():
+                        o = single_sample[single_sample['sample_name'] == sample_name]
+                        for i, row in o.iterrows():
+                            image_path = os.path.join(
+                                            self.prj.metadata.results_subdir,
+                                            sample_name, row['anchor_image'])
+                            image_relpath = os.path.relpath(image_path, reports_dir)
+                            page_path = os.path.join(
+                                            self.prj.metadata.results_subdir,
+                                            sample_name, row['filename'])
+                            page_relpath = os.path.relpath(page_path, reports_dir)
+                            # If the object has a thumbnail image, add as a figure
+                            if os.path.isfile(image_path) and os.path.isfile(page_path):
+                                # If the object has a valid image, add as a figure
+                                if str(image_path).lower().endswith(('.png', '.jpg', '.jpeg', '.svg', '.gif')):
+                                    figures.append(SAMPLE_PLOTS.format(
+                                                    label=str(row['key']),
+                                                    path=page_relpath,
+                                                    image=image_relpath))
+                                # Otherwise treat as a link
+                                elif os.path.isfile(page_path):
+                                    links.append(GENERIC_LIST_ENTRY.format(
+                                                    label=str(row['key']),
+                                                    page=page_relpath))
+                                # If neither, there is no object by that name
+                                else:
+                                    warnings.append(str(row['filename']))
+                            # If no thumbnail image, it's just a link
                             elif os.path.isfile(page_path):
                                 links.append(GENERIC_LIST_ENTRY.format(
                                                 label=str(row['key']),
                                                 page=page_relpath))
-                            # If neither, there is no object by that name
+                            # If no file present, there is no object by that name
                             else:
                                 warnings.append(str(row['filename']))
-                        # If no thumbnail image, it's just a link
-                        elif os.path.isfile(page_path):
-                            links.append(GENERIC_LIST_ENTRY.format(
-                                            label=str(row['key']),
-                                            page=page_relpath))
-                        # If no file present, there is no object by that name
-                        else:
-                            warnings.append(str(row['filename']))
-
-                html_file.write(GENERIC_LIST_HEADER)
-                html_file.write("\n".join(links))
-                html_file.write(GENERIC_LIST_FOOTER)
-                html_file.write("\t\t\t<hr>\n")
-                html_file.write("\n".join(figures))
-                html_file.write("\t\t</div>\n")
-                html_file.write("\t\t<hr>\n")
-                html_file.write(HTML_FOOTER)
-                html_file.close()
 
+                    html_file.write(GENERIC_LIST_HEADER)
+                    html_file.write("\n".join(links))
+                    html_file.write(GENERIC_LIST_FOOTER)
+                    html_file.write("\t\t\t<hr>\n")
+                    html_file.write("\n".join(figures))
+                    html_file.write("\t\t</div>\n")
+                    html_file.write("\t\t<hr>\n")
+                    html_file.write(HTML_FOOTER)
+                    html_file.close()
+                else:
+                    # Sample was not run through the pipeline
+                    _LOGGER.warn("{} is not present in {}".format(
+                        sample_name, self.prj.metadata.results_subdir))
+                    html_file.write(HTML_FOOTER)
+                    html_file.close()
             # TODO: accumulate warnings from these functions and only display
             #       after all samples are processed
             # _LOGGER.warn("Warning: The following files do not exist: " +
@@ -898,111 +911,135 @@ def create_status_html(all_samples):
                 html_file.write(HTML_HEAD_CLOSE)
                 html_file.write(STATUS_HEADER)
                 html_file.write(STATUS_TABLE_HEAD)
-                warning = False
+                # Alert user if the stats_summary.tsv is incomplete
+                # Likely indicates pipeline is still running
+                stats_warning = False
+                # Alert user to samples that are included in the project
+                # but have not been run
+                sample_warning = []
                 for sample in self.prj.samples:
                     sample_name = str(sample.sample_name)
-                    # Grab the status flag for the current sample
-                    flag = glob.glob(os.path.join(
-                                        self.prj.metadata.results_subdir,
-                                        sample_name, '*.flag'))               
-                    if not flag:
-                        button_class = "table-danger"
-                        flag = "Missing"
-                        _LOGGER.warn("create_status_html: No flag file found for {}".format(sample_name))
-                    elif len(flag) > 1:
-                        button_class = "table-warning"
-                        flag = "Multiple"
-                        _LOGGER.warn("create_status_html: Multiple flag files found for {}".format(sample_name))
-                    else:
-                        if "completed" in str(flag):
-                            button_class = "table-success"
-                            flag = "Completed"
-                        elif "running" in str(flag):
-                            button_class = "table-warning"
-                            flag = "Running"
-                        elif "failed" in str(flag):
+                    sample_dir = os.path.join(
+                            self.prj.metadata.results_subdir, sample_name)
+                    # Confirm sample directory exists, then build page
+                    _LOGGER.info("sample_dir: " + str(sample_dir))
+                    if os.path.exists(sample_dir):                        
+                        # Grab the status flag for the current sample
+                        flag = glob.glob(os.path.join(sample_dir, '*.flag'))               
+                        if not flag:
                             button_class = "table-danger"
-                            flag = "Failed"
+                            flag = "Missing"
+                            _LOGGER.warn("create_status_html: No flag file found for {}".format(sample_name))
+                        elif len(flag) > 1:
+                            button_class = "table-warning"
+                            flag = "Multiple"
+                            _LOGGER.warn("create_status_html: Multiple flag files found for {}".format(sample_name))
                         else:
-                            button_class = "table-secondary"
-                            flag = "Unknown"
-
-                    # Create table entry for each sample
-                    html_file.write(STATUS_ROW_HEADER)
-                    # First Col: Sample_Name (w/ link to sample page)
-                    page_name = sample_name + ".html"
-                    page_path = os.path.join(reports_dir, page_name.replace(' ', '_').lower())
-                    page_relpath = os.path.relpath(page_path, reports_dir)
-                    html_file.write(STATUS_ROW_LINK.format(
-                                        row_class="",
-                                        file_link=page_relpath,
-                                        link_name=sample_name))
-                    # Second Col: Status (color-coded)
-                    html_file.write(STATUS_ROW_VALUE.format(
-                                        row_class=button_class,
-                                        value=flag))
-                    # Third Col: Log File (w/ link to file)
-                    single_sample = all_samples[all_samples['sample_name'] == sample_name]
-                    if single_sample.empty:
-                        # When there is no objects.tsv file, search for the
-                        # presence of log, profile, and command files
-                        log_name = os.path.basename(str(glob.glob(os.path.join(
-                                    self.prj.metadata.results_subdir,
-                                    sample_name, '*log.md'))[0]))
-                        # Currently unused. Future?
-                        # profile_name = os.path.basename(str(glob.glob(os.path.join(
-                                            # self.prj.metadata.results_subdir,
-                                            # sample_name, '*profile.tsv'))[0]))
-                        # command_name = os.path.basename(str(glob.glob(os.path.join(
-                                            # self.prj.metadata.results_subdir,
-                                            # sample_name, '*commands.sh'))[0]))
-                    else:
-                        log_name = str(single_sample.iloc[0]['annotation']) + "_log.md"
-                        # Currently unused. Future?
-                        # profile_name = str(single_sample.iloc[0]['annotation']) + "_profile.tsv"
-                        # command_name = str(single_sample.iloc[0]['annotation']) + "_commands.sh"
-                    log_file = os.path.join(self.prj.metadata.results_subdir,
-                                            sample_name, log_name)
-                    log_relpath = os.path.relpath(log_file, reports_dir)
-                    if os.path.isfile(log_file):
-                        html_file.write(STATUS_ROW_LINK.format(
-                                            row_class="",
-                                            file_link=log_relpath,
-                                            link_name=log_name))
-                    else:
-                        # Leave cell empty
+                            if "completed" in str(flag):
+                                button_class = "table-success"
+                                flag = "Completed"
+                            elif "running" in str(flag):
+                                button_class = "table-warning"
+                                flag = "Running"
+                            elif "failed" in str(flag):
+                                button_class = "table-danger"
+                                flag = "Failed"
+                            else:
+                                button_class = "table-secondary"
+                                flag = "Unknown"
+
+                        # Create table entry for each sample
+                        html_file.write(STATUS_ROW_HEADER)
+                        # First Col: Sample_Name (w/ link to sample page)
+                        page_name = sample_name + ".html"
+                        page_path = os.path.join(reports_dir, page_name.replace(' ', '_').lower())
+                        page_relpath = os.path.relpath(page_path, reports_dir)
                         html_file.write(STATUS_ROW_LINK.format(
                                             row_class="",
-                                            file_link="",
-                                            link_name=""))
-                    # Fourth Col: Sample runtime (if completed)
-                    # If Completed, use stats.tsv
-                    stats_file = os.path.join(
-                                    self.prj.metadata.results_subdir,
-                                    sample_name, "stats.tsv")
-                    if os.path.isfile(stats_file):
-                        t = _pd.read_table(stats_file, header=None,
-                                           names=['key', 'value', 'pl'])
-                        t.drop_duplicates(subset=['key', 'pl'],
-                                          keep='last', inplace=True)
-                        try:
-                            time = str(t[t['key'] == 'Time'].iloc[0]['value'])
-                            html_file.write(STATUS_ROW_VALUE.format(
-                                            row_class="",
-                                            value=str(time)))
-                        except IndexError:
-                            warning = True                       
-                    else:
+                                            file_link=page_relpath,
+                                            link_name=sample_name))
+                        # Second Col: Status (color-coded)
                         html_file.write(STATUS_ROW_VALUE.format(
                                             row_class=button_class,
-                                            value="Unknown"))
-                    html_file.write(STATUS_ROW_FOOTER)
+                                            value=flag))
+                        # Third Col: Log File (w/ link to file)
+                        single_sample = all_samples[all_samples['sample_name'] == sample_name]
+                        if single_sample.empty:
+                            # When there is no objects.tsv file, search for the
+                            # presence of log, profile, and command files
+                            log_name = os.path.basename(str(glob.glob(os.path.join(
+                                        self.prj.metadata.results_subdir,
+                                        sample_name, '*log.md'))[0]))
+                            # Currently unused. Future?
+                            # profile_name = os.path.basename(str(glob.glob(os.path.join(
+                                                # self.prj.metadata.results_subdir,
+                                                # sample_name, '*profile.tsv'))[0]))
+                            # command_name = os.path.basename(str(glob.glob(os.path.join(
+                                                # self.prj.metadata.results_subdir,
+                                                # sample_name, '*commands.sh'))[0]))
+                        else:
+                            log_name = str(single_sample.iloc[0]['annotation']) + "_log.md"
+                            # Currently unused. Future?
+                            # profile_name = str(single_sample.iloc[0]['annotation']) + "_profile.tsv"
+                            # command_name = str(single_sample.iloc[0]['annotation']) + "_commands.sh"
+                        log_file = os.path.join(self.prj.metadata.results_subdir,
+                                                sample_name, log_name)
+                        log_relpath = os.path.relpath(log_file, reports_dir)
+                        if os.path.isfile(log_file):
+                            html_file.write(STATUS_ROW_LINK.format(
+                                                row_class="",
+                                                file_link=log_relpath,
+                                                link_name=log_name))
+                        else:
+                            # Leave cell empty
+                            html_file.write(STATUS_ROW_LINK.format(
+                                                row_class="",
+                                                file_link="",
+                                                link_name=""))
+                        # Fourth Col: Sample runtime (if completed)
+                        # If Completed, use stats.tsv
+                        stats_file = os.path.join(
+                                        self.prj.metadata.results_subdir,
+                                        sample_name, "stats.tsv")
+                        if os.path.isfile(stats_file):
+                            t = _pd.read_table(stats_file, header=None,
+                                               names=['key', 'value', 'pl'])
+                            t.drop_duplicates(subset=['key', 'pl'],
+                                              keep='last', inplace=True)
+                            try:
+                                time = str(t[t['key'] == 'Time'].iloc[0]['value'])
+                                html_file.write(STATUS_ROW_VALUE.format(
+                                                row_class="",
+                                                value=str(time)))
+                            except IndexError:
+                                stats_warning = True                       
+                        else:
+                            html_file.write(STATUS_ROW_VALUE.format(
+                                                row_class=button_class,
+                                                value="Unknown"))
+                        html_file.write(STATUS_ROW_FOOTER)
+                    else:
+                        # Sample was not run through the pipeline
+                        sample_warning.append(sample_name)
 
+                # Close HTML file
                 html_file.write(STATUS_FOOTER)
                 html_file.write(HTML_FOOTER)
                 html_file.close()
-                if warning:
+                
+                # Alert the user to any warnings generated
+                if stats_warning:
                     _LOGGER.warn("The stats_summary.tsv file is incomplete")
+                if sample_warning:
+                    if len(sample_warning)==1:
+                        _LOGGER.warn("{} is not present in {}".format(
+                            ''.join(str(sample) for sample in sample_warning),
+                            self.prj.metadata.results_subdir))
+                    else:
+                        warn_msg = "The following samples are not present in {}: {}"
+                        _LOGGER.warn(warn_msg.format(
+                            self.prj.metadata.results_subdir,
+                            ' '.join(str(sample) for sample in sample_warning)))
 
         def create_navbar(objs, wd):
             """
@@ -1299,6 +1336,7 @@ def create_index_html(objs, stats):
             _LOGGER.info(
                 "Summary (n=" + str(len(stats)) + "): " + tsv_outfile_path)
 
+        _LOGGER.info("create_index_html")
         # Generate HTML report
         create_index_html(objs, stats)
 

From bbc61e87587ec52214979c89e001c8838298eff5 Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Tue, 26 Jun 2018 14:37:43 -0400
Subject: [PATCH 05/35] Produce plots of per command runtime for each sample in
 a project

---
 looper_runtime_plot.R | 259 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 259 insertions(+)
 create mode 100755 looper_runtime_plot.R

diff --git a/looper_runtime_plot.R b/looper_runtime_plot.R
new file mode 100755
index 000000000..680b7b35e
--- /dev/null
+++ b/looper_runtime_plot.R
@@ -0,0 +1,259 @@
+#! /usr/bin/env Rscript
+###############################################################################
+#06/04/18
+#Last Updated 06/21/18
+#Original Author: Jason Smith
+#looper_runtime_plot.R
+#
+#This program is meant to plot a comparison of total runtime to a breakdown
+#of the runtime for each pipeline subcommand
+#
+#NOTES:
+#usage: Rscript /path/to/Rscript/looper_runtime_plot.R 
+#       /path/to/project_config.yaml
+#
+#requirements: argparser, ggplot2, grid, stringr, pepr
+#
+###############################################################################
+####                              DEPENDENCIES                             ####
+###############################################################################
+##### LOAD ARGUMENTPARSER #####
+if(suppressPackageStartupMessages(!require(argparser))) {
+    install.packages("argparser")
+}
+suppressPackageStartupMessages(library(argparser, quietly=TRUE))
+
+# Create a parser
+p <- arg_parser("Produce an ATACseq pipeline (PEPATAC) runtime plot")
+
+# Add command line arguments
+p <- add_argument(p, "config", 
+                  help="PEPATAC project_config.yaml")
+# p <- add_argument(p, "--output", 
+                  # help="PNG or PDF",
+                  # default = "PDF")
+
+# Parse the command line arguments
+argv <- parse_args(p)
+
+##### LOAD ADDITIONAL DEPENDENCIES #####
+warnSetting <- getOption("warn")
+options(warn = -1)
+if(suppressPackageStartupMessages(!require(ggplot2))) {
+    install.packages("ggplot2")
+}
+if(suppressPackageStartupMessages(!require(grid))) {
+    install.packages("grid")
+}
+if(suppressPackageStartupMessages(!require(stringr))) {
+    install.packages("stringr")
+}
+if(suppressPackageStartupMessages(!require(pepr))) {
+    devtools::install_github("pepkit/pepr")
+}
+suppressPackageStartupMessages(library(ggplot2))
+suppressPackageStartupMessages(library(grid))
+suppressPackageStartupMessages(library(stringr))
+suppressPackageStartupMessages(library(pepr))
+options(warn = warnSetting)
+
+###############################################################################
+####                               FUNCTIONS                               ####
+###############################################################################
+# Convert Hours:Minutes:Seconds to Seconds 
+toSeconds <- function(HMS){
+    if (!is.character(HMS)) {
+        stop("HMS must be a character string of the form H:M:S")
+    }
+    if (length(HMS)<=0){
+        return(HMS)
+    }
+    unlist(
+        lapply(HMS,
+               function(i){
+                   i <- as.numeric(strsplit(i,':',fixed=TRUE)[[1]])
+                   if      (length(i) == 3) {i[1]*3600 + i[2]*60 + i[3]}
+                   else if (length(i) == 2) {i[1]*60 + i[2]}
+                   else if (length(i) == 1) {i[1]}
+               }))
+} 
+
+# Convert seconds back to HMS format
+secondsToString <- function(secs, digits=2){
+    unlist(
+        lapply(secs,
+               function(i){
+                   # includes fractional seconds
+                   fs  <- as.integer(round((i - round(i))*(10^digits)))
+                   fmt <- ''
+                   if (i >= 3600)    {fmt <- '%H:%M:%S'}
+                   else if (i >= 60) {fmt <- '%M:%S'}
+                   else              {fmt <- '%OS'}
+                   i   <- format(as.POSIXct(strptime("0:0:0","%H:%M:%S")) +
+                                 i, format=fmt)
+                   if (fs > 0) {sub('[0]+$','',paste(i,fs,sep='.'))}
+                   else        {i}
+               }))
+}
+
+# Taken from https://github.com/baptiste/egg/blob/master/R/set_panel_size.r
+set_panel_size <- function(p=NULL, g=ggplotGrob(p), file=NULL, 
+                           margin=unit(1, "in"),
+                           width=unit(4, "in"), 
+                           height=unit(4, "in")){
+    
+    panels <- grep("panel", g$layout$name)
+    panel_index_w <- unique(g$layout$l[panels])
+    panel_index_h <- unique(g$layout$t[panels])
+    nw <- length(panel_index_w)
+    nh <- length(panel_index_h)
+    
+    if(getRversion() < "3.3.0"){
+        
+        # the following conversion is necessary
+        # because there is no `[<-`.unit method
+        # so promoting to unit.list allows standard list indexing
+        g$widths  <- grid:::unit.list(g$widths)
+        g$heights <- grid:::unit.list(g$heights)
+        
+        g$widths[panel_index_w]  <- rep(list(width), nw)
+        g$heights[panel_index_h] <- rep(list(height), nh)
+        
+    } else {
+        
+        g$widths[panel_index_w]  <- rep(width, nw)
+        g$heights[panel_index_h] <- rep(height, nh)
+        
+    }
+    
+    if(!is.null(file))
+        ggsave(file, g, limitsize = FALSE,
+               width=convertWidth(sum(g$widths) + margin, 
+                                  unitTo="in", valueOnly=TRUE),
+               height=convertHeight(sum(g$heights) + margin,  
+                                    unitTo="in", valueOnly=TRUE))
+    
+    invisible(g)
+}
+
+# Helper function to build a file path to the correct output folder using a
+# specified suffix
+buildFilePath = function(sampleName, suffix, pep=prj) {
+    invisible(capture.output(outputDir <- config(pep)$metadata$output_dir))
+    file.path(outputDir, "results_pipeline", sampleName,
+              paste(sampleName, suffix, sep=""))
+}
+
+# Remove sequentially duplicated values in a column, summing the values
+# in the other
+dedupSequential = function(dupDF) {
+    dupList <- dupDF[c(tail(dupDF[,1],-1) != head(dupDF[,1],-1), TRUE),][,1]
+    dedupDF <- data.frame(cmd=character(length(dupList)),
+                          val=numeric(length(dupList)),
+                          stringsAsFactors=FALSE)
+    currentPos <- 1
+    counter    <- 1
+    while (counter <= nrow(dupDF)) {
+        currentCmd <- dupDF[counter, 1]
+        total      <- dupDF[counter, 2]
+        if (counter + 1 < nrow(dupDF)) {
+            nextCmd     <- dupDF[counter + 1, 1]
+            while (nextCmd == currentCmd) {
+                counter <- counter + 1
+                total   <- total + dupDF[counter, 2]
+                nextCmd <- dupDF[counter + 1, 1]
+            }
+        }
+        dedupDF[currentPos, 1] <- currentCmd
+        dedupDF[currentPos, 2] <- total
+        currentPos <- currentPos + 1
+        counter    <- counter + 1
+    }
+    return (dedupDF)
+}
+
+# Produce a runtime plot for a sample
+plotRuntime = function(timeFile, sampleName) {
+    # Get just the first line to get pipeline start time
+    startTime  <- readLines(timeFile, n=1)
+
+    # Extract just the starting time timestamp
+    startTime  <- word(startTime, -1, sep=" ")
+
+    # Get the run times for each pipeline command
+    # Ignore any lines containing '#'
+    timeStamps <- read.delim2(timeFile, skip=2, header = FALSE,
+                              as.is=TRUE, comment.char = '#')
+
+    # Remove leading directory structure
+    for (i in 1:nrow(timeStamps)) {
+        timeStamps[i,1]  <- sub('.*\\/', '', timeStamps[i,1])   
+    }
+    timeStamps           <- timeStamps[,-c(2,4)]
+    colnames(timeStamps) <- c("cmd","time")
+
+    timeStamps$time <- toSeconds(timeStamps$time)
+    
+    # Combine any of the same commands to get total time spent per command
+    # Eliminate only sequentially duplicated commands
+    combinedTime <- dedupSequential(timeStamps)
+    colnames(combinedTime) <- c("cmd", "time")
+    
+    totalTime       <- sum(combinedTime$time)
+    finishTime      <- secondsToString(toSeconds(startTime) + totalTime)
+
+    num.rows <- nrow(combinedTime)
+    combinedTime[num.rows+1, 1] <- "totalTime"
+    combinedTime[num.rows+1, 2] <- as.character(totalTime)
+
+    combinedTime$time  <- as.numeric(combinedTime$time)
+    combinedTime$cmd   <- as.character(combinedTime$cmd)
+    # Set order for plotting purposes
+    combinedTime$order <- as.factor(as.numeric(row.names(combinedTime)))
+
+    # Create plot
+    p <- ggplot(data=combinedTime, aes(x=order, y=time)) +
+                geom_bar(stat="identity", position=position_dodge())+
+                scale_fill_brewer(palette="Paired")+
+                theme_minimal() +
+                coord_flip() +
+                labs(y = paste("Time (s)\n", "[Start: ", startTime, " | ", 
+                               "End: ", finishTime, "]", sep=""),
+                     x = "PEPATAC Command") +
+                scale_x_discrete(labels=combinedTime$cmd) +
+                theme(plot.title = element_text(hjust = 0.5))
+    
+    # Produce both PDF and PNG
+    set_panel_size(
+        p, 
+        file=buildFilePath(sampleName, "_Runtime.pdf", prj), 
+        width=unit(8,"inches"), 
+        height=unit(5.5,"inches"))
+    set_panel_size(
+        p, 
+        file=buildFilePath(sampleName, "_Runtime.png", prj), 
+        width=unit(8,"inches"), 
+        height=unit(5.5,"inches"))
+}
+
+###############################################################################
+####                               OPEN FILE                               ####
+###############################################################################
+
+configFile <- argv$config
+prj = Project(configFile)
+
+###############################################################################
+####                                 MAIN                                  ####
+###############################################################################
+# For each sample in the project, produce a runtime summary plot
+invisible(capture.output(outputDir <- config(prj)$metadata$output_dir))
+invisible(capture.output(numSamples <- length(samples(prj)$sample_name)))
+for (i in 1:numSamples) {
+    invisible(capture.output(sampleName <- samples(prj)$sample_name[i]))
+    timeFile <- file.path(outputDir, "results_pipeline",
+                          sampleName, "ATACseq_profile.tsv")
+    plotRuntime(timeFile, sampleName)
+}
+
+write("Completed!\n", stdout())
\ No newline at end of file

From 4874c60a1ea978787ca532299a8b51095478b18d Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Tue, 26 Jun 2018 14:40:56 -0400
Subject: [PATCH 06/35] Eliminate redundancy in warning messaging

---
 looper/html_reports.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/looper/html_reports.py b/looper/html_reports.py
index 2ba0ba85e..aeab8c319 100644
--- a/looper/html_reports.py
+++ b/looper/html_reports.py
@@ -770,11 +770,9 @@ def create_sample_html(all_samples, sample_name, sample_stats):
                     if not flag:  
                         button_class = "btn btn-danger"
                         flag = "Missing"
-                        _LOGGER.warn("create_sample_html: No flag file found for {}".format(sample_name))
                     elif len(flag) > 1:
                         button_class = "btn btn-warning"
                         flag = "Multiple"
-                        _LOGGER.warn("create_sample_html: Multiple flag files found for {}".format(sample_name))
                     else:
                         if "completed" in str(flag):
                             button_class = "btn btn-success"
@@ -929,11 +927,11 @@ def create_status_html(all_samples):
                         if not flag:
                             button_class = "table-danger"
                             flag = "Missing"
-                            _LOGGER.warn("create_status_html: No flag file found for {}".format(sample_name))
+                            _LOGGER.warn("No flag file found for {}".format(sample_name))
                         elif len(flag) > 1:
                             button_class = "table-warning"
                             flag = "Multiple"
-                            _LOGGER.warn("create_status_html: Multiple flag files found for {}".format(sample_name))
+                            _LOGGER.warn("Multiple flag files found for {}".format(sample_name))
                         else:
                             if "completed" in str(flag):
                                 button_class = "table-success"

From d74655ad052eeb6d70bfb8c52b1e3c223cd31350 Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Tue, 26 Jun 2018 18:25:26 -0400
Subject: [PATCH 07/35] make looper logger the root so peppy attaches to it

---
 looper/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/looper/__init__.py b/looper/__init__.py
index 5d8704ade..0d5c484f1 100644
--- a/looper/__init__.py
+++ b/looper/__init__.py
@@ -58,7 +58,7 @@ def setup_looper_logger(level, additional_locations=None, devmode=False):
     fmt = DEV_LOGGING_FMT if devmode else DEFAULT_LOGGING_FMT
 
     # Establish the logger.
-    LOOPER_LOGGER = logging.getLogger("looper")
+    LOOPER_LOGGER = logging.getLogger()
     # First remove any previously-added handlers
     LOOPER_LOGGER.handlers = []
     LOOPER_LOGGER.propagate = False

From b9e15dd0386ff3c3f15b756f4a17055028b29920 Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Wed, 27 Jun 2018 07:41:44 -0400
Subject: [PATCH 08/35] Update HTMLReportBuilder to new-style class

---
 looper/html_reports.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/looper/html_reports.py b/looper/html_reports.py
index aeab8c319..b1485486f 100644
--- a/looper/html_reports.py
+++ b/looper/html_reports.py
@@ -557,7 +557,7 @@
 __all__ = HTML_VARS + NAVBAR_VARS + GENERIC_VARS + \
           TABLE_VARS + SAMPLE_VARS + STATUS_VARS + OBJECTS_VARS
 
-class HTMLReportBuilder():
+class HTMLReportBuilder(object):
     """ Generate HTML summary report for project/samples """
 
     def __init__(self, prj):

From b00348525278b7b0738cf6f56dc20c0225b1b9e8 Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Wed, 27 Jun 2018 08:17:31 -0400
Subject: [PATCH 09/35] Fix object page naming

---
 looper/html_reports.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/looper/html_reports.py b/looper/html_reports.py
index b1485486f..bf5d94b3b 100644
--- a/looper/html_reports.py
+++ b/looper/html_reports.py
@@ -647,9 +647,10 @@ def create_object_html(single_object, all_objects):
             """
 
             reports_dir = os.path.join(self.prj.metadata.output_dir, "reports")
-            # Generate object type and filename
-            type = single_object['key'].drop_duplicates()
-            filename = str(type) + ".html"
+            # Generate object filename
+            for key in single_object['key'].drop_duplicates().sort_values():
+                type = str(key)
+                filename = str(key) + ".html"
             object_path = os.path.join(
                             reports_dir, filename.replace(' ', '_').lower())
             if not os.path.exists(os.path.dirname(object_path)):

From f0901eff2e4300ce736c0bb933b904bf5e887603 Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Wed, 27 Jun 2018 08:27:59 -0400
Subject: [PATCH 10/35] Update status.html table style

---
 looper/html_reports.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/looper/html_reports.py b/looper/html_reports.py
index bf5d94b3b..86ff30afc 100644
--- a/looper/html_reports.py
+++ b/looper/html_reports.py
@@ -276,7 +276,7 @@
         }
 """
 TABLE_STYLE_TEXT = \
-"""
+"""\
         .table td.text {
             max-width: 150px;
             <!-- top|right|bottom|left -->
@@ -495,11 +495,11 @@
 """
 STATUS_ROW_VALUE = \
 """\
-                  <td class='{row_class}' style="padding: 0px 4px 0px 4px; vertical-align: middle">{value}</td>
+                  <td class='{row_class}'>{value}</td>
 """
 STATUS_ROW_LINK = \
 """\
-                  <td class='{row_class}' style="cursor:pointer; padding: 0px 4px 0px 4px; vertical-align: middle" onclick="location.href='{file_link}'"><a href="{file_link}" target="_top">{link_name}</a></td>
+                  <td class='{row_class}' style="cursor:pointer" onclick="location.href='{file_link}'"><a href="{file_link}" target="_top">{link_name}</a></td>
 """
 STATUS_ROW_FOOTER = \
 """\
@@ -906,6 +906,9 @@ def create_status_html(all_samples):
                 os.makedirs(os.path.dirname(status_html_path))
             with open(status_html_path, 'w') as html_file:
                 html_file.write(HTML_HEAD_OPEN)
+                html_file.write("\t\t<style>\n")
+                html_file.write(TABLE_STYLE_TEXT)
+                html_file.write("\t\t</style>\n")
                 html_file.write(create_navbar(all_samples, reports_dir))
                 html_file.write(HTML_HEAD_CLOSE)
                 html_file.write(STATUS_HEADER)

From 305c709e1b82fed54c7bfea976f27321fd1321d6 Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Wed, 27 Jun 2018 08:41:23 -0400
Subject: [PATCH 11/35] Fix sample page table width

---
 looper/html_reports.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/looper/html_reports.py b/looper/html_reports.py
index 86ff30afc..40ed482f6 100644
--- a/looper/html_reports.py
+++ b/looper/html_reports.py
@@ -280,7 +280,7 @@
         .table td.text {
             max-width: 150px;
             <!-- top|right|bottom|left -->
-            padding: 0px 4px 0px 4px;
+            padding: 0px 2px 0px 2px;
         }
         .table td.text span {
             white-space: nowrap;
@@ -424,9 +424,9 @@
 SAMPLE_TABLE_STYLE = \
 """\
         .table td.text {
-            max-width: 50%;
+            max-width: 500px;
             <!-- top|right|bottom|left -->
-            padding: 0px 0px 0px 0px;
+            padding: 0px 2px 0px 2px;
         }
         .table td.text span {
             white-space: nowrap;

From 3610dc74241bc98f34573319a20b4456ccfdc5b7 Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Wed, 27 Jun 2018 10:41:08 -0400
Subject: [PATCH 12/35] Fix profile.tsv file location determination

---
 looper_runtime_plot.R | 67 +++++++++++++++++++++++++++----------------
 1 file changed, 43 insertions(+), 24 deletions(-)

diff --git a/looper_runtime_plot.R b/looper_runtime_plot.R
index 680b7b35e..97eb5ce13 100755
--- a/looper_runtime_plot.R
+++ b/looper_runtime_plot.R
@@ -1,7 +1,7 @@
 #! /usr/bin/env Rscript
 ###############################################################################
 #06/04/18
-#Last Updated 06/21/18
+#Last Updated 06/27/18
 #Original Author: Jason Smith
 #looper_runtime_plot.R
 #
@@ -18,10 +18,25 @@
 ####                              DEPENDENCIES                             ####
 ###############################################################################
 ##### LOAD ARGUMENTPARSER #####
-if(suppressPackageStartupMessages(!require(argparser))) {
-    install.packages("argparser")
+loadLibrary <- tryCatch (
+    {
+        suppressWarnings(suppressPackageStartupMessages(library(argparser)))
+    },
+    error=function(e) {
+        message("Error: Install the \"argparser\"",
+                " library before proceeding.")
+        return(NULL)
+    },
+    warning=function(e) {
+        message(e)
+        return(TRUE)
+    }
+)
+if (length(loadLibrary)!=0) {
+    suppressWarnings(library(argparser))
+} else {
+    quit()
 }
-suppressPackageStartupMessages(library(argparser, quietly=TRUE))
 
 # Create a parser
 p <- arg_parser("Produce an ATACseq pipeline (PEPATAC) runtime plot")
@@ -37,25 +52,29 @@ p <- add_argument(p, "config",
 argv <- parse_args(p)
 
 ##### LOAD ADDITIONAL DEPENDENCIES #####
-warnSetting <- getOption("warn")
-options(warn = -1)
-if(suppressPackageStartupMessages(!require(ggplot2))) {
-    install.packages("ggplot2")
-}
-if(suppressPackageStartupMessages(!require(grid))) {
-    install.packages("grid")
-}
-if(suppressPackageStartupMessages(!require(stringr))) {
-    install.packages("stringr")
-}
-if(suppressPackageStartupMessages(!require(pepr))) {
-    devtools::install_github("pepkit/pepr")
+required_libraries <- c("ggplot2", "grid", "stringr", "pepr")
+for (i in required_libraries) {
+    loadLibrary <- tryCatch (
+        {
+            suppressPackageStartupMessages(
+                suppressWarnings(library(i, character.only=TRUE)))
+        },
+        error=function(e) {
+            message("Error: Install the \"", i,
+                    "\" library before proceeding.")
+            return(NULL)
+        },
+        warning=function(e) {
+            message(e)
+            return(1)
+        }
+    )
+    if (length(loadLibrary)!=0) {
+        suppressWarnings(library(i, character.only=TRUE))
+    } else {
+        quit()
+    }
 }
-suppressPackageStartupMessages(library(ggplot2))
-suppressPackageStartupMessages(library(grid))
-suppressPackageStartupMessages(library(stringr))
-suppressPackageStartupMessages(library(pepr))
-options(warn = warnSetting)
 
 ###############################################################################
 ####                               FUNCTIONS                               ####
@@ -251,8 +270,8 @@ invisible(capture.output(outputDir <- config(prj)$metadata$output_dir))
 invisible(capture.output(numSamples <- length(samples(prj)$sample_name)))
 for (i in 1:numSamples) {
     invisible(capture.output(sampleName <- samples(prj)$sample_name[i]))
-    timeFile <- file.path(outputDir, "results_pipeline",
-                          sampleName, "ATACseq_profile.tsv")
+    timeFile <- Sys.glob(file.path(outputDir, "results_pipeline",
+                                   sampleName, "*_profile.tsv"))
     plotRuntime(timeFile, sampleName)
 }
 

From 11656d804e1e6f3c307012944fab9fe8de223dfe Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Wed, 27 Jun 2018 11:12:29 -0400
Subject: [PATCH 13/35] Report path to HTML report upon completion

---
 looper/html_reports.py | 57 +++++++++++++++++++++---------------------
 looper/looper.py       |  6 +++--
 2 files changed, 33 insertions(+), 30 deletions(-)

diff --git a/looper/html_reports.py b/looper/html_reports.py
index 40ed482f6..6330305b2 100644
--- a/looper/html_reports.py
+++ b/looper/html_reports.py
@@ -1233,16 +1233,16 @@ def create_index_html(objs, stats):
             # Generate parent samples.html page path
             sample_parent_path = os.path.join(reports_dir, "samples.html")
 
-            objs_html_file = open(index_html_path, 'w')
-            objs_html_file.write(HTML_HEAD_OPEN)
-            objs_html_file.write("\t\t<style>\n")
-            objs_html_file.write(TABLE_STYLE_ROTATED_HEADER)
-            objs_html_file.write(TABLE_STYLE_TEXT)
-            objs_html_file.write("\t\t</style>\n")
-            objs_html_file.write(HTML_TITLE.format(project_name=self.prj.name))
+            index_html_file = open(index_html_path, 'w')
+            index_html_file.write(HTML_HEAD_OPEN)
+            index_html_file.write("\t\t<style>\n")
+            index_html_file.write(TABLE_STYLE_ROTATED_HEADER)
+            index_html_file.write(TABLE_STYLE_TEXT)
+            index_html_file.write("\t\t</style>\n")
+            index_html_file.write(HTML_TITLE.format(project_name=self.prj.name))
             navbar = create_navbar(objs, self.prj.metadata.output_dir)
-            objs_html_file.write(navbar)
-            objs_html_file.write(HTML_HEAD_CLOSE)
+            index_html_file.write(navbar)
+            index_html_file.write(HTML_HEAD_CLOSE)
 
             # Add stats_summary.tsv button link
             tsv_outfile_path = os.path.join(self.prj.metadata.output_dir,
@@ -1252,13 +1252,13 @@ def create_index_html(objs, stats):
             tsv_outfile_path += '_stats_summary.tsv'
             stats_relpath = os.path.relpath(tsv_outfile_path,
                                             self.prj.metadata.output_dir)
-            objs_html_file.write(HTML_BUTTON.format(
+            index_html_file.write(HTML_BUTTON.format(
                 file_path=stats_relpath, label="Stats Summary File"))
 
             # Add stats summary table to index page and produce individual
             # sample pages
             if os.path.isfile(tsv_outfile_path):
-                objs_html_file.write(TABLE_HEADER)
+                index_html_file.write(TABLE_HEADER)
                 # Produce table columns
                 sample_pos = 0
                 # Get unique column name list
@@ -1270,8 +1270,8 @@ def create_index_html(objs, stats):
                 unique_columns = uniqify(col_names)
                 # Write table column names to index.html file
                 for key in unique_columns:
-                    objs_html_file.write(TABLE_COLS.format(col_val=str(key)))
-                objs_html_file.write(TABLE_COLS_FOOTER)
+                    index_html_file.write(TABLE_COLS.format(col_val=str(key)))
+                index_html_file.write(TABLE_COLS_FOOTER)
 
                 # Produce table rows
                 sample_pos = 0
@@ -1289,7 +1289,7 @@ def create_index_html(objs, stats):
                     # Reset column position counter
                     col_pos = 0
                     sample_name = str(stats[sample_pos]['sample_name'])
-                    objs_html_file.write(TABLE_ROW_HEADER)
+                    index_html_file.write(TABLE_ROW_HEADER)
                     for value in table_row:                 
                         if value == sample_name:
                             # Generate individual sample page and return link
@@ -1297,17 +1297,17 @@ def create_index_html(objs, stats):
                                                              sample_name,
                                                              stats[sample_pos])
                             # Treat sample_name as a link to sample page
-                            objs_html_file.write(TABLE_ROWS_LINK.format(
+                            index_html_file.write(TABLE_ROWS_LINK.format(
                                 html_page=sample_page,
                                 page_name=sample_page,
                                 link_name=sample_name))
                         # If not the sample name, add as an unlinked cell value
                         else:
-                            objs_html_file.write(TABLE_ROWS.format(
+                            index_html_file.write(TABLE_ROWS.format(
                                 row_val=str(value)))
-                    objs_html_file.write(TABLE_ROW_FOOTER)
+                    index_html_file.write(TABLE_ROW_FOOTER)
                     sample_pos += 1
-                objs_html_file.write(TABLE_FOOTER)
+                index_html_file.write(TABLE_FOOTER)
             else:
                 _LOGGER.warn("No stats file '%s'", stats_file)
 
@@ -1327,20 +1327,21 @@ def create_index_html(objs, stats):
 
             # Add project level objects
             prj_objs = create_project_objects()
-            objs_html_file.write("\t\t<hr>\n")
-            objs_html_file.write(prj_objs)
-            objs_html_file.write("\t\t<hr>\n")
+            index_html_file.write("\t\t<hr>\n")
+            index_html_file.write(prj_objs)
+            index_html_file.write("\t\t<hr>\n")
 
             # Complete and close HTML file
-            objs_html_file.write(HTML_FOOTER)
-            objs_html_file.close()
-
-            _LOGGER.info(
-                "Summary (n=" + str(len(stats)) + "): " + tsv_outfile_path)
+            index_html_file.write(HTML_FOOTER)
+            index_html_file.close()
+            
+            # Return the path to the completed index.html file
+            return index_html_path
 
-        _LOGGER.info("create_index_html")
         # Generate HTML report
-        create_index_html(objs, stats)
+        index_html_path = create_index_html(objs, stats)
+        return index_html_path
+
 
 def uniqify(seq):
     """ Fast way to uniqify while preserving input order. """
diff --git a/looper/looper.py b/looper/looper.py
index 7b5223826..d9c60a2fc 100755
--- a/looper/looper.py
+++ b/looper/looper.py
@@ -660,7 +660,7 @@ def __call__(self):
                 _LOGGER.debug(iface)
                 pl = iface.fetch_pipelines(protocol)
                 summarizers = iface.get_attribute(pl, "summarizers")
-                for summarizer in summarizers:
+                for summarizer in set(summarizers):
                     summarizer_abspath = os.path.join(
                         os.path.dirname(iface.pipe_iface_file), summarizer)
                     _LOGGER.debug([summarizer_abspath, self.prj.config_file])
@@ -671,7 +671,9 @@ def __call__(self):
 
         # Produce HTML report
         report_builder = HTMLReportBuilder(self.prj)
-        report_builder(objs, stats)
+        report_path = report_builder(objs, stats)
+        _LOGGER.info(
+                "HTML Report (n=" + str(len(stats)) + "): " + report_path)
 
 def aggregate_exec_skip_reasons(skip_reasons_sample_pairs):
     """

From d4e2ef1027f821e1afef0912cf8ce4f16361103e Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Wed, 27 Jun 2018 14:52:06 -0400
Subject: [PATCH 14/35] Produce an average runtime file

---
 looper_runtime_plot.R | 36 +++++++++++++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 5 deletions(-)

diff --git a/looper_runtime_plot.R b/looper_runtime_plot.R
index 97eb5ce13..bc90bbe64 100755
--- a/looper_runtime_plot.R
+++ b/looper_runtime_plot.R
@@ -12,7 +12,7 @@
 #usage: Rscript /path/to/Rscript/looper_runtime_plot.R 
 #       /path/to/project_config.yaml
 #
-#requirements: argparser, ggplot2, grid, stringr, pepr
+#requirements: argparser, dplyr, ggplot2, grid, stringr, pepr
 #
 ###############################################################################
 ####                              DEPENDENCIES                             ####
@@ -52,7 +52,7 @@ p <- add_argument(p, "config",
 argv <- parse_args(p)
 
 ##### LOAD ADDITIONAL DEPENDENCIES #####
-required_libraries <- c("ggplot2", "grid", "stringr", "pepr")
+required_libraries <- c("dplyr", "ggplot2", "grid", "stringr", "pepr")
 for (i in required_libraries) {
     loadLibrary <- tryCatch (
         {
@@ -192,7 +192,7 @@ dedupSequential = function(dupDF) {
 }
 
 # Produce a runtime plot for a sample
-plotRuntime = function(timeFile, sampleName) {
+getRuntime = function(timeFile, sampleName) {
     # Get just the first line to get pipeline start time
     startTime  <- readLines(timeFile, n=1)
 
@@ -229,7 +229,7 @@ plotRuntime = function(timeFile, sampleName) {
     combinedTime$cmd   <- as.character(combinedTime$cmd)
     # Set order for plotting purposes
     combinedTime$order <- as.factor(as.numeric(row.names(combinedTime)))
-
+    
     # Create plot
     p <- ggplot(data=combinedTime, aes(x=order, y=time)) +
                 geom_bar(stat="identity", position=position_dodge())+
@@ -253,6 +253,8 @@ plotRuntime = function(timeFile, sampleName) {
         file=buildFilePath(sampleName, "_Runtime.png", prj), 
         width=unit(8,"inches"), 
         height=unit(5.5,"inches"))
+    
+    return(combinedTime)
 }
 
 ###############################################################################
@@ -266,13 +268,37 @@ prj = Project(configFile)
 ####                                 MAIN                                  ####
 ###############################################################################
 # For each sample in the project, produce a runtime summary plot
+if (!is.null(config(prj)$name)) {
+    accumName <- file.path(config(prj)$metadata$output_dir,
+                           paste(config(prj)$name, "average_runtime.csv",
+                                 sep="_"))
+} else {
+    accumName <- file.path(config(prj)$metadata$output_dir,
+                           "average_runtime.csv")
+}
 invisible(capture.output(outputDir <- config(prj)$metadata$output_dir))
 invisible(capture.output(numSamples <- length(samples(prj)$sample_name)))
+accumulated <- data.frame(cmd=as.character(), time=as.numeric(), order=as.numeric())
 for (i in 1:numSamples) {
     invisible(capture.output(sampleName <- samples(prj)$sample_name[i]))
     timeFile <- Sys.glob(file.path(outputDir, "results_pipeline",
                                    sampleName, "*_profile.tsv"))
-    plotRuntime(timeFile, sampleName)
+    combinedTime <- getRuntime(timeFile, sampleName)
+    if (i == 1) {
+        accumulated <- combinedTime
+    } else {
+        accumulated <- full_join(accumulated, combinedTime, by=c("cmd","order"))
+    }
+}
+accumulated <- accumulated[,-c(2,3)]
+final <- data.frame(cmd=as.character(), average_time=as.numeric())
+for (i in 1:nrow(accumulated)) {
+    cmd <- accumulated$cmd[i]
+    tmp <- accumulated[,-1]
+    average_time <- as.numeric(sum(tmp[i,], na.rm=TRUE))/(ncol(tmp)-1)
+    average = data.frame(cbind(cmd, average_time))
+    final <- rbind(final, average)
 }
+write.csv(final, accumName, row.names=FALSE)
 
 write("Completed!\n", stdout())
\ No newline at end of file

From 46d6983c5cb78b650e1ec52d9d862c6ba4dbc72f Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Wed, 27 Jun 2018 15:50:05 -0400
Subject: [PATCH 15/35] increase logo size

---
 looper/html_reports.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/looper/html_reports.py b/looper/html_reports.py
index 6330305b2..ca97ade34 100644
--- a/looper/html_reports.py
+++ b/looper/html_reports.py
@@ -83,7 +83,7 @@
 """\
         <div id="top"></div>
         <nav class="navbar sticky-top navbar-expand-lg navbar-dark bg-primary">
-          <a class="navbar-left" href="#top"><img src="{logo}" width="30" height="30" class="d-inline-block align-middle img-responsive" alt="LOOPER"></a>
+          <a class="navbar-left" href="#top"><img src="{logo}" class="d-inline-block align-middle img-responsive" alt="LOOPER" style="max-height:60px; margin-top:-10px; margin-bottom:-10px"></a>
           <button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarSupportedContent" aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation">
             <span class="navbar-toggler-icon"></span>
           </button>

From fa04fd12cb191442301eb11df2c66c532c495b29 Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Thu, 28 Jun 2018 08:48:09 -0400
Subject: [PATCH 16/35] Fix Summary page stats table ordering

---
 looper/html_reports.py | 27 ++++++---------------------
 looper/looper.py       | 14 ++++----------
 2 files changed, 10 insertions(+), 31 deletions(-)

diff --git a/looper/html_reports.py b/looper/html_reports.py
index ca97ade34..5b5b57410 100644
--- a/looper/html_reports.py
+++ b/looper/html_reports.py
@@ -569,7 +569,7 @@ def __init__(self, prj):
         super(HTMLReportBuilder, self).__init__()
         self.prj = prj
 
-    def __call__(self, objs, stats):
+    def __call__(self, objs, stats, columns):
         """ Do the work of the subcommand/program. """
 
         def create_object_parent_html(all_objects):
@@ -924,7 +924,6 @@ def create_status_html(all_samples):
                     sample_dir = os.path.join(
                             self.prj.metadata.results_subdir, sample_name)
                     # Confirm sample directory exists, then build page
-                    _LOGGER.info("sample_dir: " + str(sample_dir))
                     if os.path.exists(sample_dir):                        
                         # Grab the status flag for the current sample
                         flag = glob.glob(os.path.join(sample_dir, '*.flag'))               
@@ -1212,7 +1211,7 @@ def create_project_objects():
                                    "\n".join(obj_links),
                                    OBJECTS_LIST_FOOTER]))
 
-        def create_index_html(objs, stats):
+        def create_index_html(objs, stats, col_names):
             """
             Generate an index.html style project home page w/ sample summary
             statistics
@@ -1224,14 +1223,9 @@ def create_index_html(objs, stats):
             """
 
             objs.drop_duplicates(keep='last', inplace=True)
-            reports_dir = os.path.join(self.prj.metadata.output_dir, "reports")
             # Generate parent index.html page path
             index_html_path = "{root}_summary.html".format(
                 root=os.path.join(self.prj.metadata.output_dir, self.prj.name))
-            # Generate parent objects.html page path
-            object_parent_path = os.path.join(reports_dir, "objects.html")
-            # Generate parent samples.html page path
-            sample_parent_path = os.path.join(reports_dir, "samples.html")
 
             index_html_file = open(index_html_path, 'w')
             index_html_file.write(HTML_HEAD_OPEN)
@@ -1260,28 +1254,19 @@ def create_index_html(objs, stats):
             if os.path.isfile(tsv_outfile_path):
                 index_html_file.write(TABLE_HEADER)
                 # Produce table columns
-                sample_pos = 0
-                # Get unique column name list
-                col_names = []
-                while sample_pos < len(stats):
-                    for key, value in stats[sample_pos].items():
-                        col_names.append(key)
-                    sample_pos += 1
-                unique_columns = uniqify(col_names)
-                # Write table column names to index.html file
-                for key in unique_columns:
+                for key in col_names:
                     index_html_file.write(TABLE_COLS.format(col_val=str(key)))
                 index_html_file.write(TABLE_COLS_FOOTER)
 
                 # Produce table rows
                 sample_pos = 0
                 col_pos = 0
-                num_columns = len(unique_columns)
+                num_columns = len(col_names)
                 for row in stats:
                     # Match row value to column
                     table_row = []
                     while col_pos < num_columns:
-                        value = row.get(unique_columns[col_pos])
+                        value = row.get(col_names[col_pos])
                         if value is None:
                             value = ''
                         table_row.append(value)
@@ -1339,7 +1324,7 @@ def create_index_html(objs, stats):
             return index_html_path
 
         # Generate HTML report
-        index_html_path = create_index_html(objs, stats)
+        index_html_path = create_index_html(objs, stats, columns)
         return index_html_path
 
 
diff --git a/looper/looper.py b/looper/looper.py
index d9c60a2fc..44cc965c5 100755
--- a/looper/looper.py
+++ b/looper/looper.py
@@ -600,19 +600,17 @@ def __call__(self):
 
             t = _pd.read_table(
                 stats_file, header=None, names=['key', 'value', 'pl'])
-
             t.drop_duplicates(subset=['key', 'pl'], keep='last', inplace=True)
             # t.duplicated(subset= ['key'], keep = False)
             t.loc[:, 'plkey'] = t['pl'] + ":" + t['key']
             dupes = t.duplicated(subset=['key'], keep=False)
             t.loc[dupes, 'key'] = t.loc[dupes, 'plkey']
-
             sample_stats.update(t.set_index('key')['value'].to_dict())
             stats.append(sample_stats)
             columns.extend(t.key.tolist())
 
-        self.counter.reset() 
-        
+        self.counter.reset()
+
         # Create objects summary file
         for sample in self.prj.samples:
             # Process any reported objects
@@ -634,16 +632,12 @@ def __call__(self):
         if hasattr(self.prj, "subproject") and self.prj.subproject:
             tsv_outfile_path += '_' + self.prj.subproject
         tsv_outfile_path += '_stats_summary.tsv'
-
         tsv_outfile = open(tsv_outfile_path, 'w')
-
         tsv_writer = csv.DictWriter(tsv_outfile, fieldnames=uniqify(columns),
-                                    delimiter='\t', extrasaction='ignore')
+                                    delimiter='\t', extrasaction='ignore')     
         tsv_writer.writeheader()
-
         for row in stats:
             tsv_writer.writerow(row)
-
         tsv_outfile.close()
 
         _LOGGER.info(
@@ -671,7 +665,7 @@ def __call__(self):
 
         # Produce HTML report
         report_builder = HTMLReportBuilder(self.prj)
-        report_path = report_builder(objs, stats)
+        report_path = report_builder(objs, stats, uniqify(columns))
         _LOGGER.info(
                 "HTML Report (n=" + str(len(stats)) + "): " + report_path)
 

From f51086217af024f483c06a8d3a587d14737899e2 Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Thu, 28 Jun 2018 09:30:07 -0400
Subject: [PATCH 17/35] Fix disordered sample page stats table; see #47

---
 looper/html_reports.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/looper/html_reports.py b/looper/html_reports.py
index 5b5b57410..a10b9aa5a 100644
--- a/looper/html_reports.py
+++ b/looper/html_reports.py
@@ -6,6 +6,7 @@
 import logging
 
 from peppy.utils import alpha_cased
+from collections import OrderedDict
 
 _LOGGER = logging.getLogger('HTMLReportBuilder')
 
@@ -1264,6 +1265,7 @@ def create_index_html(objs, stats, col_names):
                 num_columns = len(col_names)
                 for row in stats:
                     # Match row value to column
+                    # Row is disordered and does not handle empty cells
                     table_row = []
                     while col_pos < num_columns:
                         value = row.get(col_names[col_pos])
@@ -1275,12 +1277,14 @@ def create_index_html(objs, stats, col_names):
                     col_pos = 0
                     sample_name = str(stats[sample_pos]['sample_name'])
                     index_html_file.write(TABLE_ROW_HEADER)
-                    for value in table_row:                 
+                    # Order table_row by col_names
+                    sample_stats = OrderedDict(zip(col_names, table_row))
+                    for value in table_row:
                         if value == sample_name:
                             # Generate individual sample page and return link
                             sample_page = create_sample_html(objs,
                                                              sample_name,
-                                                             stats[sample_pos])
+                                                             sample_stats)
                             # Treat sample_name as a link to sample page
                             index_html_file.write(TABLE_ROWS_LINK.format(
                                 html_page=sample_page,

From 9542044ed4c5067e0d920e7567ed7d63645d8521 Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Thu, 28 Jun 2018 10:49:44 -0400
Subject: [PATCH 18/35] add space

---
 looper/looper.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/looper/looper.py b/looper/looper.py
index 44cc965c5..f6d35b8b8 100755
--- a/looper/looper.py
+++ b/looper/looper.py
@@ -669,6 +669,7 @@ def __call__(self):
         _LOGGER.info(
                 "HTML Report (n=" + str(len(stats)) + "): " + report_path)
 
+
 def aggregate_exec_skip_reasons(skip_reasons_sample_pairs):
     """
     Collect the reasons for skipping submission/execution of each sample

From 1f2ec5c8be4a852999e527174796de0cdfc25a0e Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Thu, 28 Jun 2018 11:01:39 -0400
Subject: [PATCH 19/35] update usage docs version

---
 doc/source/usage.rst | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/doc/source/usage.rst b/doc/source/usage.rst
index a8e58c4f9..fefdc19fc 100644
--- a/doc/source/usage.rst
+++ b/doc/source/usage.rst
@@ -22,7 +22,7 @@ Here you can see the command-line usage instructions for the main looper command
 
 .. code-block:: none
 
-	version: 0.9.0-dev
+	version: 0.9.0
 	usage: looper [-h] [-V] [--logfile LOGFILE] [--verbosity {0,1,2,3,4}] [--dbg]
 	              {run,summarize,destroy,check,clean} ...
 	
@@ -53,7 +53,7 @@ Here you can see the command-line usage instructions for the main looper command
 
 .. code-block:: none
 
-	version: 0.9.0-dev
+	version: 0.9.0
 	usage: looper run [-h] [-t TIME_DELAY] [--ignore-flags]
 	                  [--allow-duplicate-names] [--compute COMPUTE] [--env ENV]
 	                  [--limit LIMIT] [--lump LUMP] [--lumpn LUMPN]
@@ -106,7 +106,7 @@ Here you can see the command-line usage instructions for the main looper command
 
 .. code-block:: none
 
-	version: 0.9.0-dev
+	version: 0.9.0
 	usage: looper summarize [-h] [--file-checks] [-d]
 	                        [--exclude-protocols [EXCLUDE_PROTOCOLS [EXCLUDE_PROTOCOLS ...]]
 	                        | --include-protocols
@@ -137,7 +137,7 @@ Here you can see the command-line usage instructions for the main looper command
 
 .. code-block:: none
 
-	version: 0.9.0-dev
+	version: 0.9.0
 	usage: looper destroy [-h] [--file-checks] [-d]
 	                      [--exclude-protocols [EXCLUDE_PROTOCOLS [EXCLUDE_PROTOCOLS ...]]
 	                      | --include-protocols
@@ -168,7 +168,7 @@ Here you can see the command-line usage instructions for the main looper command
 
 .. code-block:: none
 
-	version: 0.9.0-dev
+	version: 0.9.0
 	usage: looper check [-h] [-A] [-F [FLAGS [FLAGS ...]]] [--file-checks] [-d]
 	                    [--exclude-protocols [EXCLUDE_PROTOCOLS [EXCLUDE_PROTOCOLS ...]]
 	                    | --include-protocols
@@ -204,7 +204,7 @@ Here you can see the command-line usage instructions for the main looper command
 
 .. code-block:: none
 
-	version: 0.9.0-dev
+	version: 0.9.0
 	usage: looper clean [-h] [--file-checks] [-d]
 	                    [--exclude-protocols [EXCLUDE_PROTOCOLS [EXCLUDE_PROTOCOLS ...]]
 	                    | --include-protocols

From 165d10f2fce3d6fcd80b5bbbcb80492b4da58deb Mon Sep 17 00:00:00 2001
From: Vince Reuter <vince.reuter@gmail.com>
Date: Thu, 28 Jun 2018 12:52:51 -0400
Subject: [PATCH 20/35] skip null location entries

---
 looper/__init__.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/looper/__init__.py b/looper/__init__.py
index 0d5c484f1..76b65d7a4 100644
--- a/looper/__init__.py
+++ b/looper/__init__.py
@@ -94,7 +94,10 @@ def setup_looper_logger(level, additional_locations=None, devmode=False):
 
     # Add the handlers.
     formatter = logging.Formatter(fmt=(fmt or DEFAULT_LOGGING_FMT))
+
     for loc in where:
+        if not loc:
+            continue
         if isinstance(loc, str):
             # File destination
             dirpath = os.path.abspath(os.path.dirname(loc))

From eda743d529f23bfaf4f6fed9b5abfbf8fcd91b5e Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Fri, 29 Jun 2018 08:20:45 -0400
Subject: [PATCH 21/35] version bump for release

---
 doc/source/changelog.rst |  7 +++++++
 doc/source/usage.rst     | 12 ++++++------
 looper/_version.py       |  2 +-
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst
index e9e2cdbdb..34e2a4a3e 100644
--- a/doc/source/changelog.rst
+++ b/doc/source/changelog.rst
@@ -1,5 +1,12 @@
 Changelog
 ******************************
+- **v0.9.1** (*2018-06-30*):
+
+  - Fixed
+
+    - Fixed several bugs with ``looper summarize`` that caused failure on edge cases.
+
+
 - **v0.9.0** (*2018-06-25*):
 
   - New
diff --git a/doc/source/usage.rst b/doc/source/usage.rst
index fefdc19fc..e728f681c 100644
--- a/doc/source/usage.rst
+++ b/doc/source/usage.rst
@@ -22,7 +22,7 @@ Here you can see the command-line usage instructions for the main looper command
 
 .. code-block:: none
 
-	version: 0.9.0
+	version: 0.9.1
 	usage: looper [-h] [-V] [--logfile LOGFILE] [--verbosity {0,1,2,3,4}] [--dbg]
 	              {run,summarize,destroy,check,clean} ...
 	
@@ -53,7 +53,7 @@ Here you can see the command-line usage instructions for the main looper command
 
 .. code-block:: none
 
-	version: 0.9.0
+	version: 0.9.1
 	usage: looper run [-h] [-t TIME_DELAY] [--ignore-flags]
 	                  [--allow-duplicate-names] [--compute COMPUTE] [--env ENV]
 	                  [--limit LIMIT] [--lump LUMP] [--lumpn LUMPN]
@@ -106,7 +106,7 @@ Here you can see the command-line usage instructions for the main looper command
 
 .. code-block:: none
 
-	version: 0.9.0
+	version: 0.9.1
 	usage: looper summarize [-h] [--file-checks] [-d]
 	                        [--exclude-protocols [EXCLUDE_PROTOCOLS [EXCLUDE_PROTOCOLS ...]]
 	                        | --include-protocols
@@ -137,7 +137,7 @@ Here you can see the command-line usage instructions for the main looper command
 
 .. code-block:: none
 
-	version: 0.9.0
+	version: 0.9.1
 	usage: looper destroy [-h] [--file-checks] [-d]
 	                      [--exclude-protocols [EXCLUDE_PROTOCOLS [EXCLUDE_PROTOCOLS ...]]
 	                      | --include-protocols
@@ -168,7 +168,7 @@ Here you can see the command-line usage instructions for the main looper command
 
 .. code-block:: none
 
-	version: 0.9.0
+	version: 0.9.1
 	usage: looper check [-h] [-A] [-F [FLAGS [FLAGS ...]]] [--file-checks] [-d]
 	                    [--exclude-protocols [EXCLUDE_PROTOCOLS [EXCLUDE_PROTOCOLS ...]]
 	                    | --include-protocols
@@ -204,7 +204,7 @@ Here you can see the command-line usage instructions for the main looper command
 
 .. code-block:: none
 
-	version: 0.9.0
+	version: 0.9.1
 	usage: looper clean [-h] [--file-checks] [-d]
 	                    [--exclude-protocols [EXCLUDE_PROTOCOLS [EXCLUDE_PROTOCOLS ...]]
 	                    | --include-protocols
diff --git a/looper/_version.py b/looper/_version.py
index 3e2f46a3a..d69d16e98 100644
--- a/looper/_version.py
+++ b/looper/_version.py
@@ -1 +1 @@
-__version__ = "0.9.0"
+__version__ = "0.9.1"

From c9e739086d0da53d1eb167f91420b76b418c21ac Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Fri, 29 Jun 2018 08:32:52 -0400
Subject: [PATCH 22/35] relax requirement versions in general

---
 requirements/requirements-all.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt
index 7a47966e1..b96530751 100644
--- a/requirements/requirements-all.txt
+++ b/requirements/requirements-all.txt
@@ -1,4 +1,4 @@
-colorama==0.3.9
+colorama>=0.3.9
 pandas>=0.20.2
-pyyaml==3.12
-peppy>=0.17.2
+pyyaml>=3.12
+peppy>=0.18.1

From dfb22a0d88db6b4290cce0fd3f837796485564ec Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Fri, 29 Jun 2018 10:59:04 -0400
Subject: [PATCH 23/35] Fix single sample error

---
 looper_runtime_plot.R | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/looper_runtime_plot.R b/looper_runtime_plot.R
index bc90bbe64..d41bc966e 100755
--- a/looper_runtime_plot.R
+++ b/looper_runtime_plot.R
@@ -290,15 +290,17 @@ for (i in 1:numSamples) {
         accumulated <- full_join(accumulated, combinedTime, by=c("cmd","order"))
     }
 }
-accumulated <- accumulated[,-c(2,3)]
+accumulated <- subset(accumulated, select=-c(order))
 final <- data.frame(cmd=as.character(), average_time=as.numeric())
 for (i in 1:nrow(accumulated)) {
     cmd <- accumulated$cmd[i]
     tmp <- accumulated[,-1]
-    average_time <- as.numeric(sum(tmp[i,], na.rm=TRUE))/(ncol(tmp)-1)
+    tmp <- subset(accumulated, select=-c(cmd))
+    average_time <- as.numeric(sum(tmp[i,], na.rm=TRUE))/numSamples
     average = data.frame(cbind(cmd, average_time))
     final <- rbind(final, average)
 }
+
 write.csv(final, accumName, row.names=FALSE)
 
 write("Completed!\n", stdout())
\ No newline at end of file

From 102ef62a6a7a35ec9239f1484df0d17350ee2c07 Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Fri, 29 Jun 2018 11:02:11 -0400
Subject: [PATCH 24/35] Typo

---
 looper_runtime_plot.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/looper_runtime_plot.R b/looper_runtime_plot.R
index d41bc966e..fdc27a5d2 100755
--- a/looper_runtime_plot.R
+++ b/looper_runtime_plot.R
@@ -294,7 +294,6 @@ accumulated <- subset(accumulated, select=-c(order))
 final <- data.frame(cmd=as.character(), average_time=as.numeric())
 for (i in 1:nrow(accumulated)) {
     cmd <- accumulated$cmd[i]
-    tmp <- accumulated[,-1]
     tmp <- subset(accumulated, select=-c(cmd))
     average_time <- as.numeric(sum(tmp[i,], na.rm=TRUE))/numSamples
     average = data.frame(cbind(cmd, average_time))

From e97f07e26cdd2d3d6732f1e8e9965959cc1ce24c Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Fri, 29 Jun 2018 12:35:34 -0400
Subject: [PATCH 25/35] Change missing library messaging; handle still running
 samples and/or missing samples

---
 looper_runtime_plot.R | 90 ++++++++++++++++++++++++++++++++-----------
 1 file changed, 68 insertions(+), 22 deletions(-)

diff --git a/looper_runtime_plot.R b/looper_runtime_plot.R
index fdc27a5d2..2ec32b5a6 100755
--- a/looper_runtime_plot.R
+++ b/looper_runtime_plot.R
@@ -61,7 +61,7 @@ for (i in required_libraries) {
         },
         error=function(e) {
             message("Error: Install the \"", i,
-                    "\" library before proceeding.")
+                    "\" R library before proceeding.")
             return(NULL)
         },
         warning=function(e) {
@@ -194,16 +194,44 @@ dedupSequential = function(dupDF) {
 # Produce a runtime plot for a sample
 getRuntime = function(timeFile, sampleName) {
     # Get just the first line to get pipeline start time
-    startTime  <- readLines(timeFile, n=1)
+    if (length(timeFile) == 0 || !file.exists(timeFile)) {
+        fileMissing <<- TRUE
+        return(data.frame(cmd=as.character(),
+                          time=as.numeric(),
+                          order=as.numeric()))
+    } else {
+        fileMissing <<- FALSE
+        startTime  <- readLines(timeFile, n=1)
+    }    
 
     # Extract just the starting time timestamp
     startTime  <- word(startTime, -1, sep=" ")
 
     # Get the run times for each pipeline command
     # Ignore any lines containing '#'
-    timeStamps <- read.delim2(timeFile, skip=2, header = FALSE,
-                              as.is=TRUE, comment.char = '#')
-
+    # TODO: Handle an empty file for a still running or failed sample
+    timeStamps <- tryCatch(
+        {
+            read.delim2(timeFile, skip=2, header = FALSE,
+                        as.is=TRUE, comment.char = '#')
+        },
+        error=function(e) {
+            message("The profile.tsv file for ", sampleName, " contains no ",
+                    "commands.  Check if ", sampleName, " has yet to be run.")
+            timeStamps <- data.frame(cmd=as.character(),
+                                     time=as.numeric(),
+                                     order=as.numeric())
+            return(timeStamps)
+        },
+        warning=function(e) {
+            message("The profile.tsv file for ", sampleName, " is incomplete.")
+            message("WARNING: ", e)
+        }
+    )
+    if (nrow(timeStamps) == 0 ) {
+        # The profile.tsv contains no commands
+        return(timeStamps)
+    }
     # Remove leading directory structure
     for (i in 1:nrow(timeStamps)) {
         timeStamps[i,1]  <- sub('.*\\/', '', timeStamps[i,1])   
@@ -276,30 +304,48 @@ if (!is.null(config(prj)$name)) {
     accumName <- file.path(config(prj)$metadata$output_dir,
                            "average_runtime.csv")
 }
-invisible(capture.output(outputDir <- config(prj)$metadata$output_dir))
+invisible(capture.output(outputDir  <- config(prj)$metadata$output_dir))
 invisible(capture.output(numSamples <- length(samples(prj)$sample_name)))
-accumulated <- data.frame(cmd=as.character(), time=as.numeric(), order=as.numeric())
+accumulated <- data.frame(cmd=as.character(), time=as.numeric(),
+                          order=as.numeric())
 for (i in 1:numSamples) {
     invisible(capture.output(sampleName <- samples(prj)$sample_name[i]))
-    timeFile <- Sys.glob(file.path(outputDir, "results_pipeline",
-                                   sampleName, "*_profile.tsv"))
-    combinedTime <- getRuntime(timeFile, sampleName)
+    timeFile        <- Sys.glob(file.path(outputDir, "results_pipeline",
+                                          sampleName, "*_profile.tsv"))
+    combinedTime    <- getRuntime(timeFile, sampleName)
     if (i == 1) {
         accumulated <- combinedTime
     } else {
-        accumulated <- full_join(accumulated, combinedTime, by=c("cmd","order"))
+        accumulated <- full_join(subset(accumulated, select=-c(order)),
+                                 subset(combinedTime, select=-c(order)),
+                                 by=c("cmd"))
     }
 }
-accumulated <- subset(accumulated, select=-c(order))
-final <- data.frame(cmd=as.character(), average_time=as.numeric())
-for (i in 1:nrow(accumulated)) {
-    cmd <- accumulated$cmd[i]
-    tmp <- subset(accumulated, select=-c(cmd))
-    average_time <- as.numeric(sum(tmp[i,], na.rm=TRUE))/numSamples
-    average = data.frame(cbind(cmd, average_time))
-    final <- rbind(final, average)
+#accumulated <- subset(accumulated, select=-c(order))
+final       <- data.frame(cmd=as.character(), average_time=as.numeric())
+if (nrow(accumulated) == 0) {
+    # Do nothing
+    final <- NULL
+} else {
+    for (i in 1:nrow(accumulated)) {
+        cmd          <- accumulated$cmd[i]
+        tmp          <- subset(accumulated, select=-c(cmd))
+        average_time <- as.numeric(sum(tmp[i,], na.rm=TRUE))/numSamples
+        average      <- data.frame(cbind(cmd, average_time))
+        final        <- rbind(final, average)
+    }
 }
 
-write.csv(final, accumName, row.names=FALSE)
-
-write("Completed!\n", stdout())
\ No newline at end of file
+if (is.null(final)) {
+    if (fileMissing) {
+        write("WARNING: Profile.tsv file(s) was/were missing.",
+              stdout())
+    } else {
+        write("WARNING: Profile.tsv file(s) contained no commands.",
+              stdout())
+    }
+} else {
+    write.csv(final, accumName, row.names=FALSE)
+    write(paste("Average command runtime (n=", numSamples, "): ",
+                accumName, sep=""), stdout())
+}

From 9686599710c512b0e0bca1257a898b132a170d93 Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Fri, 29 Jun 2018 12:46:19 -0400
Subject: [PATCH 26/35] Handle trailing matching commands in profile.tsv

---
 looper_runtime_plot.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/looper_runtime_plot.R b/looper_runtime_plot.R
index 2ec32b5a6..1eb54a969 100755
--- a/looper_runtime_plot.R
+++ b/looper_runtime_plot.R
@@ -175,12 +175,13 @@ dedupSequential = function(dupDF) {
     while (counter <= nrow(dupDF)) {
         currentCmd <- dupDF[counter, 1]
         total      <- dupDF[counter, 2]
-        if (counter + 1 < nrow(dupDF)) {
+        if (counter + 1 <= nrow(dupDF)) {
             nextCmd     <- dupDF[counter + 1, 1]
             while (nextCmd == currentCmd) {
                 counter <- counter + 1
                 total   <- total + dupDF[counter, 2]
                 nextCmd <- dupDF[counter + 1, 1]
+                if (is.na(nextCmd)) {break}
             }
         }
         dedupDF[currentPos, 1] <- currentCmd

From 2b4296fcb8f254ae98c9228b5698d0f5ddc7c74a Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Fri, 29 Jun 2018 14:52:39 -0400
Subject: [PATCH 27/35] Fix error with never-ending growth of cmd list

---
 looper_runtime_plot.R | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/looper_runtime_plot.R b/looper_runtime_plot.R
index 1eb54a969..eb082c3cc 100755
--- a/looper_runtime_plot.R
+++ b/looper_runtime_plot.R
@@ -313,16 +313,21 @@ for (i in 1:numSamples) {
     invisible(capture.output(sampleName <- samples(prj)$sample_name[i]))
     timeFile        <- Sys.glob(file.path(outputDir, "results_pipeline",
                                           sampleName, "*_profile.tsv"))
+    write(paste("Plotting runtime: ", sampleName, sep=""), stdout())
     combinedTime    <- getRuntime(timeFile, sampleName)
     if (i == 1) {
         accumulated <- combinedTime
     } else {
-        accumulated <- full_join(subset(accumulated, select=-c(order)),
-                                 subset(combinedTime, select=-c(order)),
-                                 by=c("cmd"))
+        # accumulated <- full_join(subset(accumulated, select=-c(order)),
+                                 # subset(combinedTime, select=-c(order)),
+                                 # by=c("cmd"))
+        #accumulated <- full_join(accumulated, combinedTime, by=c("cmd"))
+        accumulated <- suppressWarnings(merge(accumulated, combinedTime, by=c("cmd", "order"), all=TRUE))
     }
 }
-#accumulated <- subset(accumulated, select=-c(order))
+
+accumulated <- accumulated[order(accumulated$order), ]
+accumulated <- subset(accumulated, select=-c(order))
 final       <- data.frame(cmd=as.character(), average_time=as.numeric())
 if (nrow(accumulated) == 0) {
     # Do nothing

From d2357eda54f7bbc60b82d028d6e024fb573a5074 Mon Sep 17 00:00:00 2001
From: Vince <vreuter@users.noreply.github.com>
Date: Mon, 2 Jul 2018 11:40:06 -0400
Subject: [PATCH 28/35] Remove outdated dependency link

From when peppy wasn't yet on PyPI
Close https://github.com/pepkit/looper/issues/51
---
 setup.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/setup.py b/setup.py
index 11d99fb2c..a987a1693 100644
--- a/setup.py
+++ b/setup.py
@@ -83,7 +83,6 @@ def get_static(name, condition=None):
             'looper = looper.looper:main'
         ],
     },
-    dependency_links=["git+git://github.com/vreuter/pep.git@support-looper#egg=pep"],
     scripts=scripts,
     package_data={'looper': ['submit_templates/*']},
     include_package_data=True,

From afc28564ed4a26f3cc19ab842572e73d2bf866a2 Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Tue, 3 Jul 2018 09:02:40 -0400
Subject: [PATCH 29/35] Handle missing and incomplete profile.tsv files
 properly

---
 looper_runtime_plot.R | 171 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 138 insertions(+), 33 deletions(-)

diff --git a/looper_runtime_plot.R b/looper_runtime_plot.R
index eb082c3cc..9f9f7723f 100755
--- a/looper_runtime_plot.R
+++ b/looper_runtime_plot.R
@@ -193,7 +193,7 @@ dedupSequential = function(dupDF) {
 }
 
 # Produce a runtime plot for a sample
-getRuntime = function(timeFile, sampleName) {
+getRuntime = function(timeFile, sampleName, createPlot=TRUE) {
     # Get just the first line to get pipeline start time
     if (length(timeFile) == 0 || !file.exists(timeFile)) {
         fileMissing <<- TRUE
@@ -260,32 +260,135 @@ getRuntime = function(timeFile, sampleName) {
     combinedTime$order <- as.factor(as.numeric(row.names(combinedTime)))
     
     # Create plot
-    p <- ggplot(data=combinedTime, aes(x=order, y=time)) +
-                geom_bar(stat="identity", position=position_dodge())+
-                scale_fill_brewer(palette="Paired")+
-                theme_minimal() +
-                coord_flip() +
-                labs(y = paste("Time (s)\n", "[Start: ", startTime, " | ", 
-                               "End: ", finishTime, "]", sep=""),
-                     x = "PEPATAC Command") +
-                scale_x_discrete(labels=combinedTime$cmd) +
-                theme(plot.title = element_text(hjust = 0.5))
-    
-    # Produce both PDF and PNG
-    set_panel_size(
-        p, 
-        file=buildFilePath(sampleName, "_Runtime.pdf", prj), 
-        width=unit(8,"inches"), 
-        height=unit(5.5,"inches"))
-    set_panel_size(
-        p, 
-        file=buildFilePath(sampleName, "_Runtime.png", prj), 
-        width=unit(8,"inches"), 
-        height=unit(5.5,"inches"))
+    if (createPlot) {
+        p <- ggplot(data=combinedTime, aes(x=order, y=time)) +
+                    geom_bar(stat="identity", position=position_dodge())+
+                    scale_fill_brewer(palette="Paired")+
+                    theme_minimal() +
+                    coord_flip() +
+                    labs(y = paste("Time (s)\n", "[Start: ", startTime, " | ", 
+                                   "End: ", finishTime, "]", sep=""),
+                         x = "PEPATAC Command") +
+                    scale_x_discrete(labels=combinedTime$cmd) +
+                    theme(plot.title = element_text(hjust = 0.5))
+        
+        # Produce both PDF and PNG
+        set_panel_size(
+            p, 
+            file=buildFilePath(sampleName, "_Runtime.pdf", prj), 
+            width=unit(8,"inches"), 
+            height=unit(5.5,"inches"))
+        set_panel_size(
+            p, 
+            file=buildFilePath(sampleName, "_Runtime.png", prj), 
+            width=unit(8,"inches"), 
+            height=unit(5.5,"inches"))
+    }
     
     return(combinedTime)
 }
 
+joinTimes = function (new, preexist) {
+    combined <- sort(union(levels(preexist$order), levels(new$order)))
+    if (length(new$cmd) == length(preexist$cmd) &&
+        all(is.element(new$cmd, preexist$cmd))) {
+        #message("A")
+        # Both data.frames have the same commands in number and value
+        preexist <- full_join(preexist, new, by=c("cmd", "order"))
+        return (preexist)
+    } else if (length(new$cmd) < length(preexist$cmd)) {
+        #message("B")
+        # The to-be-added data.frame contains less total commands, but they
+        # are all present in the pre-existing data.frame
+        rebuiltNew  <- data.frame(cmd=preexist$cmd,
+                                  time=rep(0, nrow(preexist)),
+                                  order=rep(1:nrow(preexist)),
+                                  stringsAsFactors=FALSE)
+        uniqueCmds  <- data.frame(cmd=as.character(), time=as.numeric(),
+                                  order=as.numeric(), stringsAsFactors=FALSE)
+        for (i in 1:nrow(new)) {
+             if (new$cmd[i] %in% preexist$cmd) {
+                rowPos <- grep(new$cmd[i], preexist$cmd)
+                rebuiltNew[rowPos, ] <- data.frame(cmd=new$cmd[i],
+                                                   time=new$time[i],
+                                                   order=rowPos,
+                                                   stringsAsFactors=FALSE)
+             } else {
+                uniqueCmds <- rbind(uniqueCmds, new[i, ])
+             }
+        }
+        uniqueCmds$order <- as.factor(uniqueCmds$order)
+        joinedTimes <- left_join(
+                        mutate(preexist, order=factor(order, levels=combined)),
+                        mutate(rebuiltNew,
+                               order=factor(order, levels=combined)),
+                        by=c("cmd","order"))
+        joinedTimes$order <- as.factor(rep(1:nrow(joinedTimes)))
+        return(joinedTimes)
+    } else if (length(new$cmd) > length(preexist$cmd)) {
+        #message("C")
+        # The to-be-added data.frame contains more total commands but they
+        # include all the pre-existing commands
+        rebuiltPre  <- data.frame(cmd=new$cmd,
+                                  time=rep(0, nrow(new)),
+                                  order=rep(1:nrow(new)),
+                                  stringsAsFactors=FALSE)
+        uniqueCmds  <- data.frame(cmd=as.character(), time=as.numeric(),
+                                  order=as.numeric(), stringsAsFactors=FALSE)
+        for (i in 1:nrow(preexist)) {
+             if (preexist$cmd[i] %in% new$cmd) {
+                rowPos <- grep(preexist$cmd[i], new$cmd)
+                rebuiltPre[rowPos, ] <- data.frame(cmd=preexist$cmd[i],
+                                                   time=preexist$time[i],
+                                                   order=rowPos,
+                                                   stringsAsFactors=FALSE)
+             } else {
+                uniqueCmds <- rbind(uniqueCmds, preexist[i, ])
+             }
+        }
+        uniqueCmds$order <- as.factor(uniqueCmds$order)
+        joinedTimes <- left_join(
+                        mutate(new, order=factor(order, levels=combined)),
+                        mutate(rebuiltPre,
+                               order=factor(order, levels=combined)),
+                        by=c("cmd","order"))
+        joinedTimes <- suppressWarnings(full_join(
+                        joinedTimes, uniqueCmds,by=c("cmd","order")))
+        joinedTimes$order <- as.factor(rep(1:nrow(joinedTimes)))
+        return(joinedTimes)
+    } else {
+        #message("D")
+        # Both data.frames are the same length but contain different cmds
+        rebuiltNew  <- data.frame(cmd=preexist$cmd,
+                                  time=rep(0, nrow(preexist)),
+                                  order=rep(1:nrow(preexist)),
+                                  stringsAsFactors=FALSE)
+        uniqueCmds  <- data.frame(cmd=as.character(), time=as.numeric(),
+                                  order=as.numeric(), stringsAsFactors=FALSE)
+        for (i in 1:nrow(new)) {
+             if (new$cmd[i] %in% preexist$cmd) {
+                rowPos <- grep(new$cmd[i], preexist$cmd)
+                rebuiltNew[rowPos, ] <- data.frame(cmd=new$cmd[i],
+                                                   time=new$time[i],
+                                                   order=rowPos,
+                                                   stringsAsFactors=FALSE)
+             } else {
+                uniqueCmds <- rbind(uniqueCmds, new[i, ])
+             }
+        }
+        uniqueCmds$order <- as.factor(uniqueCmds$order)
+        joinedTimes <- left_join(
+                        mutate(preexist, order=factor(order, levels=combined)),
+                        mutate(rebuiltNew,
+                               order=factor(order, levels=combined)),
+                        by=c("cmd","order"))
+        joinedTimes <- suppressWarnings(full_join(
+                        joinedTimes, uniqueCmds,by=c("cmd","order")))
+        joinedTimes$order <- as.factor(rep(1:nrow(joinedTimes)))
+        return(joinedTimes)
+    }  
+}
+
 ###############################################################################
 ####                               OPEN FILE                               ####
 ###############################################################################
@@ -313,20 +416,22 @@ for (i in 1:numSamples) {
     invisible(capture.output(sampleName <- samples(prj)$sample_name[i]))
     timeFile        <- Sys.glob(file.path(outputDir, "results_pipeline",
                                           sampleName, "*_profile.tsv"))
-    write(paste("Plotting runtime: ", sampleName, sep=""), stdout())
-    combinedTime    <- getRuntime(timeFile, sampleName)
-    if (i == 1) {
-        accumulated <- combinedTime
+    if (length(timeFile) != 0) {
+        write(paste("Plotting runtime: ", sampleName, sep=""), stdout())
+        combinedTime    <- getRuntime(timeFile, sampleName)
+        if (nrow(accumulated) == 0) {
+            accumulated <- combinedTime
+        } else {
+            accumulated <- joinTimes(combinedTime, accumulated)
+        }
     } else {
-        # accumulated <- full_join(subset(accumulated, select=-c(order)),
-                                 # subset(combinedTime, select=-c(order)),
-                                 # by=c("cmd"))
-        #accumulated <- full_join(accumulated, combinedTime, by=c("cmd"))
-        accumulated <- suppressWarnings(merge(accumulated, combinedTime, by=c("cmd", "order"), all=TRUE))
+        write(paste("Could not find the profile.tsv file for \'", sampleName,
+                    "\' at location:", file.path(outputDir, "results_pipeline",
+                                               sampleName), sep=""), stdout())
     }
 }
 
-accumulated <- accumulated[order(accumulated$order), ]
+accumulated <- accumulated[order(as.numeric(row.names(accumulated))), ]
 accumulated <- subset(accumulated, select=-c(order))
 final       <- data.frame(cmd=as.character(), average_time=as.numeric())
 if (nrow(accumulated) == 0) {

From 1c97d3d277043019806dc76698e0c5706803d7f1 Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Tue, 3 Jul 2018 09:04:39 -0400
Subject: [PATCH 30/35] Update comment in new function

---
 looper_runtime_plot.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/looper_runtime_plot.R b/looper_runtime_plot.R
index 9f9f7723f..939321404 100755
--- a/looper_runtime_plot.R
+++ b/looper_runtime_plot.R
@@ -298,8 +298,8 @@ joinTimes = function (new, preexist) {
         return (preexist)
     } else if (length(new$cmd) < length(preexist$cmd)) {
         #message("B")
-        # The to-be-added data.frame contains less total commands, but they
-        # are all present in the pre-existing data.frame
+        # The to-be-added data.frame contains less commands than the
+        # pre-existing data.frame
         rebuiltNew  <- data.frame(cmd=preexist$cmd,
                                   time=rep(0, nrow(preexist)),
                                   order=rep(1:nrow(preexist)),
@@ -327,8 +327,8 @@ joinTimes = function (new, preexist) {
         return(joinedTimes)
     } else if (length(new$cmd) > length(preexist$cmd)) {
         #message("C")
-        # The to-be-added data.frame contains more total commands but they
-        # include all the pre-existing commands
+        # The to-be-added data.frame contains more commands than are present
+        # in the pre-existing data.frame
         rebuiltPre  <- data.frame(cmd=new$cmd,
                                   time=rep(0, nrow(new)),
                                   order=rep(1:nrow(new)),

From 36436d7145bba8d75243c9f78b6af2afa4b58b1f Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Tue, 3 Jul 2018 09:06:47 -0400
Subject: [PATCH 31/35] Update header comments

---
 looper_runtime_plot.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/looper_runtime_plot.R b/looper_runtime_plot.R
index 939321404..67135b214 100755
--- a/looper_runtime_plot.R
+++ b/looper_runtime_plot.R
@@ -1,7 +1,7 @@
 #! /usr/bin/env Rscript
 ###############################################################################
 #06/04/18
-#Last Updated 06/27/18
+#Last Updated 07/03/18
 #Original Author: Jason Smith
 #looper_runtime_plot.R
 #

From 0bd8455605cfd8ff7108c6de47c700d625fed67d Mon Sep 17 00:00:00 2001
From: nsheff <nsheff@users.noreply.github.com>
Date: Tue, 3 Jul 2018 14:16:15 -0400
Subject: [PATCH 32/35] small note re containers to changelog

---
 doc/source/changelog.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst
index 34e2a4a3e..29fd43169 100644
--- a/doc/source/changelog.rst
+++ b/doc/source/changelog.rst
@@ -15,7 +15,7 @@ Changelog
 
     - Add ``allow-duplicate-names`` command-line options
 
-    - Allow any variables in environment config files or other ``compute`` sections to be used in submission templates
+    - Allow any variables in environment config files or other ``compute`` sections to be used in submission templates. This allows looper to be used with containers.
 
     - Add nice universal project-level HTML reporting
 

From ea159696c33ae41c69eb8142c4ebf58def4f30c4 Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Fri, 6 Jul 2018 09:47:04 -0400
Subject: [PATCH 33/35] Update bootstrap CDN

---
 looper/html_reports.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/looper/html_reports.py b/looper/html_reports.py
index a10b9aa5a..9f276c6c6 100644
--- a/looper/html_reports.py
+++ b/looper/html_reports.py
@@ -36,7 +36,7 @@
          * Copyright 2011-2018 Twitter, Inc.
          * Licensed under MIT (https://github.com/twbs/bootstrap/blob/master/LICENSE)
          -->
-        <link rel="stylesheet" href="https://bootswatch.com/4/flatly/bootstrap.min.css">
+        <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootswatch/4.1.1/flatly/bootstrap.min.css">
 """
 HTML_TITLE = \
 """\

From 7636335ae916854eba2d8cff13bce7b6e8571bf8 Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Fri, 6 Jul 2018 10:03:37 -0400
Subject: [PATCH 34/35] Update project object caption

---
 looper/html_reports.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/looper/html_reports.py b/looper/html_reports.py
index 9f276c6c6..3b527212c 100644
--- a/looper/html_reports.py
+++ b/looper/html_reports.py
@@ -1175,7 +1175,7 @@ def create_project_objects():
                                     obj_figs.append(HTML_FIGURE.format(
                                         path=file_relpath,
                                         image=img_relpath,
-                                        label=caption))
+                                        label='{}: Click to see full-size figure'.format(caption)))
                                     num_figures += 1
                                 # Close the previous row and start a new one
                                 else:
@@ -1185,12 +1185,11 @@ def create_project_objects():
                                     obj_figs.append(HTML_FIGURE.format(
                                         path=file_relpath,
                                         image=img_relpath,
-                                        label=caption))
+                                        label='{}: Click to see full-size figure'.format(caption)))
                             # No thumbnail exists, add as a link in a list
                             else:
                                 obj_links.append(OBJECTS_LINK.format(
-                                                    path=file_relpath,
-                                                    label=caption))
+                                    path=file_relpath, label='{}: Click to see full-size figure'.format(caption)))
                         else:
                             warnings.append(caption)
 

From 96598d7ac48837100244c70f1890a49353c8654e Mon Sep 17 00:00:00 2001
From: jpsmith5 <jasonsmith@virginia.edu>
Date: Mon, 9 Jul 2018 12:57:28 -0400
Subject: [PATCH 35/35] Report peak memory use to status page; Use log file for
 elapsed runtime instead of stats.tsv

---
 looper/html_reports.py | 56 ++++++++++++++++++++++++++++++++----------
 1 file changed, 43 insertions(+), 13 deletions(-)

diff --git a/looper/html_reports.py b/looper/html_reports.py
index 3b527212c..fb13adfbe 100644
--- a/looper/html_reports.py
+++ b/looper/html_reports.py
@@ -481,6 +481,7 @@
                 <th>Status</th>
                 <th>Log file</th>
                 <th>Runtime</th>
+                <th>Peak memory use</th>
               </thead>
               <tbody>
 """
@@ -916,7 +917,7 @@ def create_status_html(all_samples):
                 html_file.write(STATUS_TABLE_HEAD)
                 # Alert user if the stats_summary.tsv is incomplete
                 # Likely indicates pipeline is still running
-                stats_warning = False
+                status_warning = False
                 # Alert user to samples that are included in the project
                 # but have not been run
                 sample_warning = []
@@ -999,26 +1000,54 @@ def create_status_html(all_samples):
                                                 file_link="",
                                                 link_name=""))
                         # Fourth Col: Sample runtime (if completed)
-                        # If Completed, use stats.tsv
-                        stats_file = os.path.join(
-                                        self.prj.metadata.results_subdir,
-                                        sample_name, "stats.tsv")
-                        if os.path.isfile(stats_file):
-                            t = _pd.read_table(stats_file, header=None,
-                                               names=['key', 'value', 'pl'])
-                            t.drop_duplicates(subset=['key', 'pl'],
-                                              keep='last', inplace=True)
+                        # Use stats.tsv <deprecated>
+                        # * Use log_file instead
+                        # stats_file = os.path.join(
+                                        # self.prj.metadata.results_subdir,
+                                        # sample_name, "stats.tsv")
+                        if os.path.isfile(log_file):
+                            # t = _pd.read_table(stats_file, header=None,
+                                               # names=['key', 'value', 'pl'])
+                            # t.drop_duplicates(subset=['key', 'pl'],
+                                              # keep='last', inplace=True)
+                            # alternate method: better to use log_file?
+                            t = _pd.read_table(log_file, header=None,
+                                                  names=['key', 'value'])
+                            t.drop_duplicates(subset=['value'], keep='last',
+                                                 inplace=True)
+                            t['key'] = t['key'].str.replace('> `', '')
+                            t['key'] = t['key'].str.replace('`', '')
                             try:
                                 time = str(t[t['key'] == 'Time'].iloc[0]['value'])
                                 html_file.write(STATUS_ROW_VALUE.format(
                                                 row_class="",
                                                 value=str(time)))
                             except IndexError:
-                                stats_warning = True                       
+                                status_warning = True                       
                         else:
                             html_file.write(STATUS_ROW_VALUE.format(
                                                 row_class=button_class,
                                                 value="Unknown"))
+                        # Fifth Col: Sample peak memory use (if completed)
+                        # Use *_log.md file
+                        if os.path.isfile(log_file):
+                            m = _pd.read_table(log_file, header=None, sep=':',
+                                               names=['key', 'value'])
+                            m.drop_duplicates(subset=['value'], keep='last',
+                                              inplace=True)
+                            m['key'] = m['key'].str.replace('*', '')
+                            m['key'] = m['key'].str.replace('^\s+', '')
+                            try:
+                                mem = str(m[m['key'] == 'Peak memory used'].iloc[0]['value'])
+                                html_file.write(STATUS_ROW_VALUE.format(
+                                                row_class="",
+                                                value=mem.replace(' ', '')))
+                            except IndexError:
+                                status_warning = True                       
+                        else:
+                            html_file.write(STATUS_ROW_VALUE.format(
+                                                row_class=button_class,
+                                                value="NA"))
                         html_file.write(STATUS_ROW_FOOTER)
                     else:
                         # Sample was not run through the pipeline
@@ -1030,8 +1059,9 @@ def create_status_html(all_samples):
                 html_file.close()
                 
                 # Alert the user to any warnings generated
-                if stats_warning:
-                    _LOGGER.warn("The stats_summary.tsv file is incomplete")
+                if status_warning:
+                    _LOGGER.warn("The pipeline is still running..." +
+                                 "Unable to complete Status.html")
                 if sample_warning:
                     if len(sample_warning)==1:
                         _LOGGER.warn("{} is not present in {}".format(