From 2ccb32c0e751c56b734fcba6f513e13b30626b03 Mon Sep 17 00:00:00 2001 From: jpsmith5 Date: Tue, 26 Jun 2018 10:05:55 -0400 Subject: [PATCH 01/35] Eliminate redundancy in create_object_html usage --- looper/html_reports.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/looper/html_reports.py b/looper/html_reports.py index 5cc6e92e5..dd64a6ea9 100644 --- a/looper/html_reports.py +++ b/looper/html_reports.py @@ -576,7 +576,7 @@ class HTMLReportBuilder(): def __init__(self, prj): """ - The Project defines the instance; establish an iteration counter. + The Project defines the instance. :param Project prj: Project with which to work/operate on """ @@ -632,23 +632,27 @@ def create_sample_parent_html(objs): html_file.write(HTML_FOOTER) html_file.close() - def create_object_html(objs, nb, type, filename, index_html): + def create_object_html(single_object, all_objects): # Generates a page for an individual object type with all of its # plots from each sample reports_dir = os.path.join(self.prj.metadata.output_dir, "reports") - object_path = os.path.join(reports_dir, - filename.replace(' ', '_').lower()) + # Generate object type and filename + type = single_object['key'].drop_duplicates() + filename = str(type) + ".html" + object_path = os.path.join( + reports_dir, filename.replace(' ', '_').lower()) if not os.path.exists(os.path.dirname(object_path)): os.makedirs(os.path.dirname(object_path)) with open(object_path, 'w') as html_file: html_file.write(HTML_HEAD_OPEN) - html_file.write(create_navbar(nb, reports_dir)) + html_file.write(create_navbar(all_objects, reports_dir)) html_file.write(HTML_HEAD_CLOSE) + html_file.write("\t\t

{} objects

\n".format(str(type))) links = [] figures = [] warnings = [] - for i, row in objs.iterrows(): + for i, row in single_object.iterrows(): page_path = os.path.join( self.prj.metadata.results_subdir, row['sample_name'], row['filename']) @@ -690,7 +694,8 @@ def create_object_html(objs, nb, type, filename, index_html): html_file.close() if warnings: - _LOGGER.warn("Warning: " + filename.replace(' ', '_').lower() + + _LOGGER.warn("create_object_html: " + + filename.replace(' ', '_').lower() + " references nonexistent object files") _LOGGER.debug(filename.replace(' ', '_').lower() + " nonexistent files: " + @@ -1230,10 +1235,8 @@ def create_index_html(objs, stats): # Create objects pages for key in objs['key'].drop_duplicates().sort_values(): - objects = objs[objs['key'] == key] - object_filename = str(key) + ".html" - create_object_html( - objects, objs, key, object_filename, objs_html_path) + single_object = objs[objs['key'] == key] + create_object_html(single_object, objs) # Create parent objects page with links to each object type create_object_parent_html(objs) From 33d7d2c472172438bebf6a82c74a40c0070a76b5 Mon Sep 17 00:00:00 2001 From: jpsmith5 Date: Tue, 26 Jun 2018 10:11:29 -0400 Subject: [PATCH 02/35] Improve variable naming; eliminate unused var in create_sample_html_html --- looper/html_reports.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/looper/html_reports.py b/looper/html_reports.py index dd64a6ea9..7f801b1ec 100644 --- a/looper/html_reports.py +++ b/looper/html_reports.py @@ -585,7 +585,7 @@ def __init__(self, prj): def __call__(self, objs, stats): - def create_object_parent_html(objs): + def create_object_parent_html(all_objects): # Generates a page listing all the project objects with links # to individual object pages reports_dir = os.path.join(self.prj.metadata.output_dir, @@ -595,11 +595,11 @@ def create_object_parent_html(objs): os.makedirs(os.path.dirname(object_parent_path)) with open(object_parent_path, 'w') as html_file: html_file.write(HTML_HEAD_OPEN) - html_file.write(create_navbar(objs, reports_dir)) + html_file.write(create_navbar(all_objects, reports_dir)) html_file.write(HTML_HEAD_CLOSE) html_file.write(GENERIC_HEADER.format(header="Objects")) html_file.write(GENERIC_LIST_HEADER) - for key in objs['key'].drop_duplicates().sort_values(): + for key in all_objects['key'].drop_duplicates().sort_values(): page_name = key + ".html" page_path = os.path.join(reports_dir, page_name.replace(' ', '_').lower()) page_relpath = os.path.relpath(page_path, reports_dir) @@ -608,7 +608,7 @@ def create_object_parent_html(objs): html_file.write(HTML_FOOTER) html_file.close() - def create_sample_parent_html(objs): + def create_sample_parent_html(all_samples): # Generates a page listing all the project samples with links # to individual sample pages reports_dir = os.path.join(self.prj.metadata.output_dir, @@ -618,7 +618,7 @@ def create_sample_parent_html(objs): os.makedirs(os.path.dirname(sample_parent_path)) with open(sample_parent_path, 'w') as html_file: html_file.write(HTML_HEAD_OPEN) - html_file.write(create_navbar(objs, reports_dir)) + html_file.write(create_navbar(all_samples, reports_dir)) html_file.write(HTML_HEAD_CLOSE) html_file.write(GENERIC_HEADER.format(header="Samples")) html_file.write(GENERIC_LIST_HEADER) @@ -820,8 +820,7 @@ def create_status_html(all_samples): if warning: _LOGGER.warn("The stats_summary.tsv file is incomplete") - def create_sample_html(all_samples, sample_name, sample_stats, - index_html): + def create_sample_html(all_samples, sample_name, sample_stats): # Produce an HTML page containing all of a sample's objects # and the sample summary statistics reports_dir = os.path.join(self.prj.metadata.output_dir, "reports") @@ -1213,9 +1212,9 @@ def create_index_html(objs, stats): for value in table_row: if value == sample_name: # Generate individual sample page and return link - sample_page = create_sample_html( - objs, sample_name, - stats[sample_pos], objs_html_path) + sample_page = create_sample_html(objs, + sample_name, + stats[sample_pos]) # Treat sample_name as a link to sample page objs_html_file.write(TABLE_ROWS_LINK.format( html_page=sample_page, From b8adf48e5327c666213b53405c955ee5f6399c26 Mon Sep 17 00:00:00 2001 From: jpsmith5 Date: Tue, 26 Jun 2018 10:48:32 -0400 Subject: [PATCH 03/35] Improve commenting and readability --- looper/html_reports.py | 367 +++++++++++++++++++++++------------------ 1 file changed, 204 insertions(+), 163 deletions(-) diff --git a/looper/html_reports.py b/looper/html_reports.py index 7f801b1ec..7abf5333d 100644 --- a/looper/html_reports.py +++ b/looper/html_reports.py @@ -8,7 +8,7 @@ from peppy.utils import alpha_cased _LOGGER = logging.getLogger('HTMLReportBuilder') - + __author__ = "Jason Smith" __email__ = "jasonsmith@virginia.edu" @@ -75,7 +75,6 @@ """ - HTML_VARS = ["HTML_HEAD_OPEN", "HTML_TITLE", "HTML_HEAD_CLOSE", "HTML_BUTTON", "HTML_FIGURE", "HTML_FOOTER"] @@ -177,7 +176,6 @@ """ - NAVBAR_VARS = ["HTML_NAVBAR_STYLE_BASIC", "HTML_NAVBAR_BASIC", "NAVBAR_HEADER", "NAVBAR_LOGO", "NAVBAR_DROPDOWN_HEADER", "NAVBAR_DROPDOWN_LINK", "NAVBAR_DROPDOWN_DIVIDER", "NAVBAR_DROPDOWN_FOOTER", @@ -192,17 +190,14 @@ """\
    """ - GENERIC_LIST_ENTRY = \ """\
  • {label}
  • """ - GENERIC_LIST_FOOTER = \ """
""" - GENERIC_VARS = ["HTML_HEAD_OPEN", "HTML_TITLE", "HTML_HEAD_CLOSE", "HTML_FOOTER", "GENERIC_HEADER", "GENERIC_LIST_HEADER", "GENERIC_LIST_ENTRY", "GENERIC_LIST_FOOTER"] @@ -315,12 +310,10 @@ """ - TABLE_COLS = \ """\
{col_val}
""" - TABLE_COLS_FOOTER = \ """\ @@ -331,7 +324,6 @@ """\ """ - TABLE_ROWS = \ """\ {row_val} @@ -340,19 +332,16 @@ """\ """ - TABLE_FOOTER = \ """\ """ - TABLE_ROWS_LINK = \ """\ {link_name} """ - LINKS_STYLE_BASIC = \ """ a.LN1 { @@ -458,7 +447,6 @@ vertical-align: middle; } """ - SAMPLE_PLOTS = \ """\
@@ -466,7 +454,6 @@
'{label}'
""" - SAMPLE_FOOTER = \ """

Return to summary page

@@ -526,11 +513,10 @@
""" - STATUS_VARS = ["STATUS_HEADER", "STATUS_TABLE_HEAD", "STATUS_BUTTON", "STATUS_ROW_HEADER", "STATUS_ROW_VALUE", "STATUS_ROW_LINK", "STATUS_ROW_FOOTER", "STATUS_FOOTER"] - + # Objects-page-related OBJECTS_HEADER = \ """\ @@ -570,7 +556,7 @@ __all__ = HTML_VARS + NAVBAR_VARS + GENERIC_VARS + \ TABLE_VARS + SAMPLE_VARS + STATUS_VARS + OBJECTS_VARS - + class HTMLReportBuilder(): """ Generate HTML summary report for project/samples """ @@ -582,12 +568,19 @@ def __init__(self, prj): """ super(HTMLReportBuilder, self).__init__() self.prj = prj - + def __call__(self, objs, stats): + """ Do the work of the subcommand/program. """ def create_object_parent_html(all_objects): - # Generates a page listing all the project objects with links - # to individual object pages + """ + Generates a page listing all the project objects with links + to individual object pages + + :param panda.DataFrame all_objects: project level dataframe + containing any reported objects for all samples + """ + reports_dir = os.path.join(self.prj.metadata.output_dir, "reports") object_parent_path = os.path.join(reports_dir, "objects.html") @@ -609,8 +602,14 @@ def create_object_parent_html(all_objects): html_file.close() def create_sample_parent_html(all_samples): - # Generates a page listing all the project samples with links - # to individual sample pages + """ + Generates a page listing all the project samples with links + to individual sample pages + + :param panda.DataFrame all_samples: project level dataframe + containing any reported objects for all samples + """ + reports_dir = os.path.join(self.prj.metadata.output_dir, "reports") sample_parent_path = os.path.join(reports_dir, "samples.html") @@ -633,8 +632,16 @@ def create_sample_parent_html(all_samples): html_file.close() def create_object_html(single_object, all_objects): - # Generates a page for an individual object type with all of its - # plots from each sample + """ + Generates a page for an individual object type with all of its + plots from each sample + + :param panda.DataFrame single_object: contains reference + information for an individual object type for all samples + :param panda.DataFrame all_objects: project level dataframe + containing any reported objects for all samples + """ + reports_dir = os.path.join(self.prj.metadata.output_dir, "reports") # Generate object type and filename type = single_object['key'].drop_duplicates() @@ -701,128 +708,17 @@ def create_object_html(single_object, all_objects): " nonexistent files: " + ','.join(str(file) for file in warnings)) - def create_status_html(all_samples): - # Generates a page listing all the samples, their run status, their - # log file, and the total runtime if completed. - reports_dir = os.path.join(self.prj.metadata.output_dir, "reports") - status_html_path = os.path.join(reports_dir, "status.html") - if not os.path.exists(os.path.dirname(status_html_path)): - os.makedirs(os.path.dirname(status_html_path)) - with open(status_html_path, 'w') as html_file: - html_file.write(HTML_HEAD_OPEN) - html_file.write(create_navbar(all_samples, reports_dir)) - html_file.write(HTML_HEAD_CLOSE) - html_file.write(STATUS_HEADER) - html_file.write(STATUS_TABLE_HEAD) - warning = False - for sample in self.prj.samples: - sample_name = str(sample.sample_name) - # Grab the status flag for the current sample - flag = glob.glob(os.path.join( - self.prj.metadata.results_subdir, - sample_name, '*.flag')) - if not flag: - button_class = "table-danger" - flag = "Missing" - _LOGGER.warn("create_status_html: No flag file found for {}".format(sample_name)) - elif len(flag) > 1: - button_class = "table-warning" - flag = "Multiple" - _LOGGER.warn("create_status_html: Multiple flag files found for {}".format(sample_name)) - else: - if "completed" in str(flag): - button_class = "table-success" - flag = "Completed" - elif "running" in str(flag): - button_class = "table-warning" - flag = "Running" - elif "failed" in str(flag): - button_class = "table-danger" - flag = "Failed" - else: - button_class = "table-secondary" - flag = "Unknown" - - # Create table entry for each sample - html_file.write(STATUS_ROW_HEADER) - # First Col: Sample_Name (w/ link to sample page) - page_name = sample_name + ".html" - page_path = os.path.join(reports_dir, page_name.replace(' ', '_').lower()) - page_relpath = os.path.relpath(page_path, reports_dir) - html_file.write(STATUS_ROW_LINK.format( - row_class="", - file_link=page_relpath, - link_name=sample_name)) - # Second Col: Status (color-coded) - html_file.write(STATUS_ROW_VALUE.format( - row_class=button_class, - value=flag)) - # Third Col: Log File (w/ link to file) - single_sample = all_samples[all_samples['sample_name'] == sample_name] - if single_sample.empty: - # When there is no objects.tsv file, search for the - # presence of log, profile, and command files - log_name = os.path.basename(str(glob.glob(os.path.join( - self.prj.metadata.results_subdir, - sample_name, '*log.md'))[0])) - # Currently unused. Future? - # profile_name = os.path.basename(str(glob.glob(os.path.join( - # self.prj.metadata.results_subdir, - # sample_name, '*profile.tsv'))[0])) - # command_name = os.path.basename(str(glob.glob(os.path.join( - # self.prj.metadata.results_subdir, - # sample_name, '*commands.sh'))[0])) - else: - log_name = str(single_sample.iloc[0]['annotation']) + "_log.md" - # Currently unused. Future? - # profile_name = str(single_sample.iloc[0]['annotation']) + "_profile.tsv" - # command_name = str(single_sample.iloc[0]['annotation']) + "_commands.sh" - log_file = os.path.join(self.prj.metadata.results_subdir, - sample_name, log_name) - log_relpath = os.path.relpath(log_file, reports_dir) - if os.path.isfile(log_file): - html_file.write(STATUS_ROW_LINK.format( - row_class="", - file_link=log_relpath, - link_name=log_name)) - else: - # Leave cell empty - html_file.write(STATUS_ROW_LINK.format( - row_class="", - file_link="", - link_name="")) - # Fourth Col: Sample runtime (if completed) - # If Completed, use stats.tsv - stats_file = os.path.join( - self.prj.metadata.results_subdir, - sample_name, "stats.tsv") - if os.path.isfile(stats_file): - t = _pd.read_table(stats_file, header=None, - names=['key', 'value', 'pl']) - t.drop_duplicates(subset=['key', 'pl'], - keep='last', inplace=True) - try: - time = str(t[t['key'] == 'Time'].iloc[0]['value']) - html_file.write(STATUS_ROW_VALUE.format( - row_class="", - value=str(time))) - except IndexError: - warning = True - else: - html_file.write(STATUS_ROW_VALUE.format( - row_class=button_class, - value="Unknown")) - html_file.write(STATUS_ROW_FOOTER) - - html_file.write(STATUS_FOOTER) - html_file.write(HTML_FOOTER) - html_file.close() - if warning: - _LOGGER.warn("The stats_summary.tsv file is incomplete") - def create_sample_html(all_samples, sample_name, sample_stats): - # Produce an HTML page containing all of a sample's objects - # and the sample summary statistics + """ + Produce an HTML page containing all of a sample's objects + and the sample summary statistics + + :param panda.DataFrame all_samples: project level dataframe + containing any reported objects for all samples + :param str sample_name: the name of the current sample + :param list stats: pipeline run statistics for the current sample + """ + reports_dir = os.path.join(self.prj.metadata.output_dir, "reports") html_filename = sample_name + ".html" html_page = os.path.join( @@ -983,17 +879,154 @@ def create_sample_html(all_samples, sample_name, sample_stats): # Return the path to the newly created sample page return sample_page_relpath + def create_status_html(all_samples): + """ + Generates a page listing all the samples, their run status, their + log file, and the total runtime if completed. + + :param panda.DataFrame all_samples: project level dataframe + containing any reported objects for all samples + """ + + reports_dir = os.path.join(self.prj.metadata.output_dir, "reports") + status_html_path = os.path.join(reports_dir, "status.html") + if not os.path.exists(os.path.dirname(status_html_path)): + os.makedirs(os.path.dirname(status_html_path)) + with open(status_html_path, 'w') as html_file: + html_file.write(HTML_HEAD_OPEN) + html_file.write(create_navbar(all_samples, reports_dir)) + html_file.write(HTML_HEAD_CLOSE) + html_file.write(STATUS_HEADER) + html_file.write(STATUS_TABLE_HEAD) + warning = False + for sample in self.prj.samples: + sample_name = str(sample.sample_name) + # Grab the status flag for the current sample + flag = glob.glob(os.path.join( + self.prj.metadata.results_subdir, + sample_name, '*.flag')) + if not flag: + button_class = "table-danger" + flag = "Missing" + _LOGGER.warn("create_status_html: No flag file found for {}".format(sample_name)) + elif len(flag) > 1: + button_class = "table-warning" + flag = "Multiple" + _LOGGER.warn("create_status_html: Multiple flag files found for {}".format(sample_name)) + else: + if "completed" in str(flag): + button_class = "table-success" + flag = "Completed" + elif "running" in str(flag): + button_class = "table-warning" + flag = "Running" + elif "failed" in str(flag): + button_class = "table-danger" + flag = "Failed" + else: + button_class = "table-secondary" + flag = "Unknown" + + # Create table entry for each sample + html_file.write(STATUS_ROW_HEADER) + # First Col: Sample_Name (w/ link to sample page) + page_name = sample_name + ".html" + page_path = os.path.join(reports_dir, page_name.replace(' ', '_').lower()) + page_relpath = os.path.relpath(page_path, reports_dir) + html_file.write(STATUS_ROW_LINK.format( + row_class="", + file_link=page_relpath, + link_name=sample_name)) + # Second Col: Status (color-coded) + html_file.write(STATUS_ROW_VALUE.format( + row_class=button_class, + value=flag)) + # Third Col: Log File (w/ link to file) + single_sample = all_samples[all_samples['sample_name'] == sample_name] + if single_sample.empty: + # When there is no objects.tsv file, search for the + # presence of log, profile, and command files + log_name = os.path.basename(str(glob.glob(os.path.join( + self.prj.metadata.results_subdir, + sample_name, '*log.md'))[0])) + # Currently unused. Future? + # profile_name = os.path.basename(str(glob.glob(os.path.join( + # self.prj.metadata.results_subdir, + # sample_name, '*profile.tsv'))[0])) + # command_name = os.path.basename(str(glob.glob(os.path.join( + # self.prj.metadata.results_subdir, + # sample_name, '*commands.sh'))[0])) + else: + log_name = str(single_sample.iloc[0]['annotation']) + "_log.md" + # Currently unused. Future? + # profile_name = str(single_sample.iloc[0]['annotation']) + "_profile.tsv" + # command_name = str(single_sample.iloc[0]['annotation']) + "_commands.sh" + log_file = os.path.join(self.prj.metadata.results_subdir, + sample_name, log_name) + log_relpath = os.path.relpath(log_file, reports_dir) + if os.path.isfile(log_file): + html_file.write(STATUS_ROW_LINK.format( + row_class="", + file_link=log_relpath, + link_name=log_name)) + else: + # Leave cell empty + html_file.write(STATUS_ROW_LINK.format( + row_class="", + file_link="", + link_name="")) + # Fourth Col: Sample runtime (if completed) + # If Completed, use stats.tsv + stats_file = os.path.join( + self.prj.metadata.results_subdir, + sample_name, "stats.tsv") + if os.path.isfile(stats_file): + t = _pd.read_table(stats_file, header=None, + names=['key', 'value', 'pl']) + t.drop_duplicates(subset=['key', 'pl'], + keep='last', inplace=True) + try: + time = str(t[t['key'] == 'Time'].iloc[0]['value']) + html_file.write(STATUS_ROW_VALUE.format( + row_class="", + value=str(time))) + except IndexError: + warning = True + else: + html_file.write(STATUS_ROW_VALUE.format( + row_class=button_class, + value="Unknown")) + html_file.write(STATUS_ROW_FOOTER) + + html_file.write(STATUS_FOOTER) + html_file.write(HTML_FOOTER) + html_file.close() + if warning: + _LOGGER.warn("The stats_summary.tsv file is incomplete") + def create_navbar(objs, wd): - # Return a string containing the navbar prebuilt html - # Includes link to all the pages - objs_html_path = "{root}_summary.html".format( + """ + Return a string containing the navbar prebuilt html. + Generates links to each page relative to the directory + of interest. + + :param pandas.DataFrame objs: project results dataframe containing + sample or object data + :param path wd: the working directory of the current HTML page + being generated, enables navbar links relative to page + """ + + # Generate full index.html path + index_html_path = "{root}_summary.html".format( root=os.path.join(self.prj.metadata.output_dir, self.prj.name)) reports_dir = os.path.join(self.prj.metadata.output_dir, "reports") - index_page_relpath = os.path.relpath(objs_html_path, wd) + # Generate index.html path relative to the HTML file under + # construction + index_page_relpath = os.path.relpath(index_html_path, wd) navbar_header = NAVBAR_HEADER.format(logo=NAVBAR_LOGO, index_html=index_page_relpath) - # Add link to STATUS page + # Add link to status.html page status_page = os.path.join(reports_dir, "status.html") # Use relative linking structure relpath = os.path.relpath(status_page, wd) @@ -1061,8 +1094,8 @@ def create_navbar(objs, wd): NAVBAR_FOOTER])) def create_project_objects(): - # If a protocol produces project level summaries add those as - # additional figures/links + """ Add project level summaries as additional figures/links """ + all_protocols = [sample.protocol for sample in self.prj.samples] # For each protocol report the project summarizers' results for protocol in set(all_protocols): @@ -1141,19 +1174,27 @@ def create_project_objects(): OBJECTS_LIST_FOOTER])) def create_index_html(objs, stats): - # Generate an index.html style project home page w/ sample summary - # statistics + """ + Generate an index.html style project home page w/ sample summary + statistics + + :param panda.DataFrame objs: project level dataframe containing + any reported objects for all samples + :param list stats: a summary file of pipeline statistics for each + analyzed sample + """ + objs.drop_duplicates(keep='last', inplace=True) reports_dir = os.path.join(self.prj.metadata.output_dir, "reports") - # Generate parent index.html page - objs_html_path = "{root}_summary.html".format( + # Generate parent index.html page path + index_html_path = "{root}_summary.html".format( root=os.path.join(self.prj.metadata.output_dir, self.prj.name)) - # Generate parent objects.html page + # Generate parent objects.html page path object_parent_path = os.path.join(reports_dir, "objects.html") - # Generate parent samples.html page + # Generate parent samples.html page path sample_parent_path = os.path.join(reports_dir, "samples.html") - objs_html_file = open(objs_html_path, 'w') + objs_html_file = open(index_html_path, 'w') objs_html_file.write(HTML_HEAD_OPEN) objs_html_file.write("\t\t\n") html_file.write(create_navbar(all_samples, reports_dir)) html_file.write(HTML_HEAD_CLOSE) html_file.write(STATUS_HEADER) From 305c709e1b82fed54c7bfea976f27321fd1321d6 Mon Sep 17 00:00:00 2001 From: jpsmith5 Date: Wed, 27 Jun 2018 08:41:23 -0400 Subject: [PATCH 11/35] Fix sample page table width --- looper/html_reports.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/looper/html_reports.py b/looper/html_reports.py index 86ff30afc..40ed482f6 100644 --- a/looper/html_reports.py +++ b/looper/html_reports.py @@ -280,7 +280,7 @@ .table td.text { max-width: 150px; - padding: 0px 4px 0px 4px; + padding: 0px 2px 0px 2px; } .table td.text span { white-space: nowrap; @@ -424,9 +424,9 @@ SAMPLE_TABLE_STYLE = \ """\ .table td.text { - max-width: 50%; + max-width: 500px; - padding: 0px 0px 0px 0px; + padding: 0px 2px 0px 2px; } .table td.text span { white-space: nowrap; From 3610dc74241bc98f34573319a20b4456ccfdc5b7 Mon Sep 17 00:00:00 2001 From: jpsmith5 Date: Wed, 27 Jun 2018 10:41:08 -0400 Subject: [PATCH 12/35] Fix profile.tsv file location determination --- looper_runtime_plot.R | 67 +++++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 24 deletions(-) diff --git a/looper_runtime_plot.R b/looper_runtime_plot.R index 680b7b35e..97eb5ce13 100755 --- a/looper_runtime_plot.R +++ b/looper_runtime_plot.R @@ -1,7 +1,7 @@ #! /usr/bin/env Rscript ############################################################################### #06/04/18 -#Last Updated 06/21/18 +#Last Updated 06/27/18 #Original Author: Jason Smith #looper_runtime_plot.R # @@ -18,10 +18,25 @@ #### DEPENDENCIES #### ############################################################################### ##### LOAD ARGUMENTPARSER ##### -if(suppressPackageStartupMessages(!require(argparser))) { - install.packages("argparser") +loadLibrary <- tryCatch ( + { + suppressWarnings(suppressPackageStartupMessages(library(argparser))) + }, + error=function(e) { + message("Error: Install the \"argparser\"", + " library before proceeding.") + return(NULL) + }, + warning=function(e) { + message(e) + return(TRUE) + } +) +if (length(loadLibrary)!=0) { + suppressWarnings(library(argparser)) +} else { + quit() } -suppressPackageStartupMessages(library(argparser, quietly=TRUE)) # Create a parser p <- arg_parser("Produce an ATACseq pipeline (PEPATAC) runtime plot") @@ -37,25 +52,29 @@ p <- add_argument(p, "config", argv <- parse_args(p) ##### LOAD ADDITIONAL DEPENDENCIES ##### -warnSetting <- getOption("warn") -options(warn = -1) -if(suppressPackageStartupMessages(!require(ggplot2))) { - install.packages("ggplot2") -} -if(suppressPackageStartupMessages(!require(grid))) { - install.packages("grid") -} -if(suppressPackageStartupMessages(!require(stringr))) { - install.packages("stringr") -} -if(suppressPackageStartupMessages(!require(pepr))) { - devtools::install_github("pepkit/pepr") +required_libraries <- c("ggplot2", "grid", "stringr", "pepr") +for (i in required_libraries) { + loadLibrary <- tryCatch ( + { + suppressPackageStartupMessages( + suppressWarnings(library(i, character.only=TRUE))) + }, + error=function(e) { + message("Error: Install the \"", i, + "\" library before proceeding.") + return(NULL) + }, + warning=function(e) { + message(e) + return(1) + } + ) + if (length(loadLibrary)!=0) { + suppressWarnings(library(i, character.only=TRUE)) + } else { + quit() + } } -suppressPackageStartupMessages(library(ggplot2)) -suppressPackageStartupMessages(library(grid)) -suppressPackageStartupMessages(library(stringr)) -suppressPackageStartupMessages(library(pepr)) -options(warn = warnSetting) ############################################################################### #### FUNCTIONS #### @@ -251,8 +270,8 @@ invisible(capture.output(outputDir <- config(prj)$metadata$output_dir)) invisible(capture.output(numSamples <- length(samples(prj)$sample_name))) for (i in 1:numSamples) { invisible(capture.output(sampleName <- samples(prj)$sample_name[i])) - timeFile <- file.path(outputDir, "results_pipeline", - sampleName, "ATACseq_profile.tsv") + timeFile <- Sys.glob(file.path(outputDir, "results_pipeline", + sampleName, "*_profile.tsv")) plotRuntime(timeFile, sampleName) } From 11656d804e1e6f3c307012944fab9fe8de223dfe Mon Sep 17 00:00:00 2001 From: jpsmith5 Date: Wed, 27 Jun 2018 11:12:29 -0400 Subject: [PATCH 13/35] Report path to HTML report upon completion --- looper/html_reports.py | 57 +++++++++++++++++++++--------------------- looper/looper.py | 6 +++-- 2 files changed, 33 insertions(+), 30 deletions(-) diff --git a/looper/html_reports.py b/looper/html_reports.py index 40ed482f6..6330305b2 100644 --- a/looper/html_reports.py +++ b/looper/html_reports.py @@ -1233,16 +1233,16 @@ def create_index_html(objs, stats): # Generate parent samples.html page path sample_parent_path = os.path.join(reports_dir, "samples.html") - objs_html_file = open(index_html_path, 'w') - objs_html_file.write(HTML_HEAD_OPEN) - objs_html_file.write("\t\t\n") - objs_html_file.write(HTML_TITLE.format(project_name=self.prj.name)) + index_html_file = open(index_html_path, 'w') + index_html_file.write(HTML_HEAD_OPEN) + index_html_file.write("\t\t\n") + index_html_file.write(HTML_TITLE.format(project_name=self.prj.name)) navbar = create_navbar(objs, self.prj.metadata.output_dir) - objs_html_file.write(navbar) - objs_html_file.write(HTML_HEAD_CLOSE) + index_html_file.write(navbar) + index_html_file.write(HTML_HEAD_CLOSE) # Add stats_summary.tsv button link tsv_outfile_path = os.path.join(self.prj.metadata.output_dir, @@ -1252,13 +1252,13 @@ def create_index_html(objs, stats): tsv_outfile_path += '_stats_summary.tsv' stats_relpath = os.path.relpath(tsv_outfile_path, self.prj.metadata.output_dir) - objs_html_file.write(HTML_BUTTON.format( + index_html_file.write(HTML_BUTTON.format( file_path=stats_relpath, label="Stats Summary File")) # Add stats summary table to index page and produce individual # sample pages if os.path.isfile(tsv_outfile_path): - objs_html_file.write(TABLE_HEADER) + index_html_file.write(TABLE_HEADER) # Produce table columns sample_pos = 0 # Get unique column name list @@ -1270,8 +1270,8 @@ def create_index_html(objs, stats): unique_columns = uniqify(col_names) # Write table column names to index.html file for key in unique_columns: - objs_html_file.write(TABLE_COLS.format(col_val=str(key))) - objs_html_file.write(TABLE_COLS_FOOTER) + index_html_file.write(TABLE_COLS.format(col_val=str(key))) + index_html_file.write(TABLE_COLS_FOOTER) # Produce table rows sample_pos = 0 @@ -1289,7 +1289,7 @@ def create_index_html(objs, stats): # Reset column position counter col_pos = 0 sample_name = str(stats[sample_pos]['sample_name']) - objs_html_file.write(TABLE_ROW_HEADER) + index_html_file.write(TABLE_ROW_HEADER) for value in table_row: if value == sample_name: # Generate individual sample page and return link @@ -1297,17 +1297,17 @@ def create_index_html(objs, stats): sample_name, stats[sample_pos]) # Treat sample_name as a link to sample page - objs_html_file.write(TABLE_ROWS_LINK.format( + index_html_file.write(TABLE_ROWS_LINK.format( html_page=sample_page, page_name=sample_page, link_name=sample_name)) # If not the sample name, add as an unlinked cell value else: - objs_html_file.write(TABLE_ROWS.format( + index_html_file.write(TABLE_ROWS.format( row_val=str(value))) - objs_html_file.write(TABLE_ROW_FOOTER) + index_html_file.write(TABLE_ROW_FOOTER) sample_pos += 1 - objs_html_file.write(TABLE_FOOTER) + index_html_file.write(TABLE_FOOTER) else: _LOGGER.warn("No stats file '%s'", stats_file) @@ -1327,20 +1327,21 @@ def create_index_html(objs, stats): # Add project level objects prj_objs = create_project_objects() - objs_html_file.write("\t\t
\n") - objs_html_file.write(prj_objs) - objs_html_file.write("\t\t
\n") + index_html_file.write("\t\t
\n") + index_html_file.write(prj_objs) + index_html_file.write("\t\t
\n") # Complete and close HTML file - objs_html_file.write(HTML_FOOTER) - objs_html_file.close() - - _LOGGER.info( - "Summary (n=" + str(len(stats)) + "): " + tsv_outfile_path) + index_html_file.write(HTML_FOOTER) + index_html_file.close() + + # Return the path to the completed index.html file + return index_html_path - _LOGGER.info("create_index_html") # Generate HTML report - create_index_html(objs, stats) + index_html_path = create_index_html(objs, stats) + return index_html_path + def uniqify(seq): """ Fast way to uniqify while preserving input order. """ diff --git a/looper/looper.py b/looper/looper.py index 7b5223826..d9c60a2fc 100755 --- a/looper/looper.py +++ b/looper/looper.py @@ -660,7 +660,7 @@ def __call__(self): _LOGGER.debug(iface) pl = iface.fetch_pipelines(protocol) summarizers = iface.get_attribute(pl, "summarizers") - for summarizer in summarizers: + for summarizer in set(summarizers): summarizer_abspath = os.path.join( os.path.dirname(iface.pipe_iface_file), summarizer) _LOGGER.debug([summarizer_abspath, self.prj.config_file]) @@ -671,7 +671,9 @@ def __call__(self): # Produce HTML report report_builder = HTMLReportBuilder(self.prj) - report_builder(objs, stats) + report_path = report_builder(objs, stats) + _LOGGER.info( + "HTML Report (n=" + str(len(stats)) + "): " + report_path) def aggregate_exec_skip_reasons(skip_reasons_sample_pairs): """ From d4e2ef1027f821e1afef0912cf8ce4f16361103e Mon Sep 17 00:00:00 2001 From: jpsmith5 Date: Wed, 27 Jun 2018 14:52:06 -0400 Subject: [PATCH 14/35] Produce an average runtime file --- looper_runtime_plot.R | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/looper_runtime_plot.R b/looper_runtime_plot.R index 97eb5ce13..bc90bbe64 100755 --- a/looper_runtime_plot.R +++ b/looper_runtime_plot.R @@ -12,7 +12,7 @@ #usage: Rscript /path/to/Rscript/looper_runtime_plot.R # /path/to/project_config.yaml # -#requirements: argparser, ggplot2, grid, stringr, pepr +#requirements: argparser, dplyr, ggplot2, grid, stringr, pepr # ############################################################################### #### DEPENDENCIES #### @@ -52,7 +52,7 @@ p <- add_argument(p, "config", argv <- parse_args(p) ##### LOAD ADDITIONAL DEPENDENCIES ##### -required_libraries <- c("ggplot2", "grid", "stringr", "pepr") +required_libraries <- c("dplyr", "ggplot2", "grid", "stringr", "pepr") for (i in required_libraries) { loadLibrary <- tryCatch ( { @@ -192,7 +192,7 @@ dedupSequential = function(dupDF) { } # Produce a runtime plot for a sample -plotRuntime = function(timeFile, sampleName) { +getRuntime = function(timeFile, sampleName) { # Get just the first line to get pipeline start time startTime <- readLines(timeFile, n=1) @@ -229,7 +229,7 @@ plotRuntime = function(timeFile, sampleName) { combinedTime$cmd <- as.character(combinedTime$cmd) # Set order for plotting purposes combinedTime$order <- as.factor(as.numeric(row.names(combinedTime))) - + # Create plot p <- ggplot(data=combinedTime, aes(x=order, y=time)) + geom_bar(stat="identity", position=position_dodge())+ @@ -253,6 +253,8 @@ plotRuntime = function(timeFile, sampleName) { file=buildFilePath(sampleName, "_Runtime.png", prj), width=unit(8,"inches"), height=unit(5.5,"inches")) + + return(combinedTime) } ############################################################################### @@ -266,13 +268,37 @@ prj = Project(configFile) #### MAIN #### ############################################################################### # For each sample in the project, produce a runtime summary plot +if (!is.null(config(prj)$name)) { + accumName <- file.path(config(prj)$metadata$output_dir, + paste(config(prj)$name, "average_runtime.csv", + sep="_")) +} else { + accumName <- file.path(config(prj)$metadata$output_dir, + "average_runtime.csv") +} invisible(capture.output(outputDir <- config(prj)$metadata$output_dir)) invisible(capture.output(numSamples <- length(samples(prj)$sample_name))) +accumulated <- data.frame(cmd=as.character(), time=as.numeric(), order=as.numeric()) for (i in 1:numSamples) { invisible(capture.output(sampleName <- samples(prj)$sample_name[i])) timeFile <- Sys.glob(file.path(outputDir, "results_pipeline", sampleName, "*_profile.tsv")) - plotRuntime(timeFile, sampleName) + combinedTime <- getRuntime(timeFile, sampleName) + if (i == 1) { + accumulated <- combinedTime + } else { + accumulated <- full_join(accumulated, combinedTime, by=c("cmd","order")) + } +} +accumulated <- accumulated[,-c(2,3)] +final <- data.frame(cmd=as.character(), average_time=as.numeric()) +for (i in 1:nrow(accumulated)) { + cmd <- accumulated$cmd[i] + tmp <- accumulated[,-1] + average_time <- as.numeric(sum(tmp[i,], na.rm=TRUE))/(ncol(tmp)-1) + average = data.frame(cbind(cmd, average_time)) + final <- rbind(final, average) } +write.csv(final, accumName, row.names=FALSE) write("Completed!\n", stdout()) \ No newline at end of file From 46d6983c5cb78b650e1ec52d9d862c6ba4dbc72f Mon Sep 17 00:00:00 2001 From: nsheff Date: Wed, 27 Jun 2018 15:50:05 -0400 Subject: [PATCH 15/35] increase logo size --- looper/html_reports.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/looper/html_reports.py b/looper/html_reports.py index 6330305b2..ca97ade34 100644 --- a/looper/html_reports.py +++ b/looper/html_reports.py @@ -83,7 +83,7 @@ """\