diff --git a/.github/workflows/run_tests.yaml b/.github/workflows/run_tests.yaml index 7b4bf59f8..503a77742 100644 --- a/.github/workflows/run_tests.yaml +++ b/.github/workflows/run_tests.yaml @@ -1,4 +1,4 @@ -name: WDL Tests +name: Tests on: push: branches: @@ -10,7 +10,7 @@ concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true jobs: - validate: + validate_wdl: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -20,8 +20,8 @@ jobs: - name: Womtool Validate run: find . -name '*.wdl' | xargs -tI {} java -jar ${WOMTOOL_JAR} validate {} - test: - needs: validate + test_wdl: + needs: validate_wdl runs-on: ubuntu-latest permissions: contents: 'read' @@ -77,4 +77,38 @@ jobs: if: always() with: name: cromwell_logs - path: cromwell_logs/ \ No newline at end of file + path: cromwell_logs/ + + python_lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.9' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + - name: Lint with Ruff + run: | + pip install ruff + ruff check --output-format=github . + + python_test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.9' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + - name: ScoreBGE Unit tests + run: | + pip install -r ImputationPipeline/ScoreBGE/requirements.txt + cd ImputationPipeline/ScoreBGE/tests + export PYTHONPATH=$GITHUB_WORKSPACE + python -m unittest test_ScoreBGE.py \ No newline at end of file diff --git a/BenchmarkSVs/SVisualizer/gather_terra_data.py b/BenchmarkSVs/SVisualizer/gather_terra_data.py index c73674ca5..f29d077f9 100644 --- a/BenchmarkSVs/SVisualizer/gather_terra_data.py +++ b/BenchmarkSVs/SVisualizer/gather_terra_data.py @@ -1,10 +1,10 @@ -print("Importing tools for script...") -import sys, os, shutil +import sys +import os +import shutil import json import tarfile import firecloud.api as fapi import pandas as pd -from datetime import datetime # Parse cmd args @@ -64,7 +64,7 @@ else: print(f"WARNING: Workflow {wf_json['workflowId']} seems to have failed... Skipping data collection for this run.") else: - print(f"WARNING: Workflow seems to have failed to launch... Skipping data collection for this run.") + print("WARNING: Workflow seems to have failed to launch... Skipping data collection for this run.") print('Consolidating files across workflow runs...') # Get list of file names across the different stat categories @@ -97,8 +97,8 @@ with open('./wdl_outputs/README.txt', 'w') as file: lines = [] lines += ['Files in this directory were created using the gather_terra_data.py script provided with the SVisualizer script.\n'] - lines += [f'Files copied on: {datetime.now().strftime("%d/%m/%Y %H:%M:%S")}\n'] - lines += [f'Taken from:\n'] + lines += ['Files copied on: {datetime.now().strftime("%d/%m/%Y %H:%M:%S")}\n'] + lines += ['Taken from:\n'] lines += [f'\tNamespace: {NAMESPACE}\n'] lines += [f'\tWorkspace: {WORKSPACE}\n'] lines += [f'\tSubmission ID: {SUBMISSION_ID}\n'] diff --git a/BenchmarkSVs/SVisualizer/qc_tabs.py b/BenchmarkSVs/SVisualizer/qc_tabs.py index d9fc3d17d..6c5b505b9 100644 --- a/BenchmarkSVs/SVisualizer/qc_tabs.py +++ b/BenchmarkSVs/SVisualizer/qc_tabs.py @@ -165,7 +165,7 @@ def make_hwe_plot(df, interval_name, breakpoint, pct_overlap): ) else: fig.add_annotation( - text=f"No sites fit criteria selected", + text="No sites fit criteria selected", xref="paper", yref="paper", x=0, y=.9, showarrow=False diff --git a/BenchmarkSVs/SVisualizer/truvari_data.py b/BenchmarkSVs/SVisualizer/truvari_data.py index 44736f5bf..f1d1e8adb 100644 --- a/BenchmarkSVs/SVisualizer/truvari_data.py +++ b/BenchmarkSVs/SVisualizer/truvari_data.py @@ -1,6 +1,6 @@ import pandas as pd -from common_utils import read_and_postprocess, convert_missing_to_pass_filter, add_bbend_stats +from common_utils import convert_missing_to_pass_filter from user_config import MAKE_MISSING_PASS_FILTER, TRUVARI_DUP_TO_INS diff --git a/BenchmarkSVs/SVisualizer/truvari_tabs.py b/BenchmarkSVs/SVisualizer/truvari_tabs.py index ddb1edf64..323d3299b 100644 --- a/BenchmarkSVs/SVisualizer/truvari_tabs.py +++ b/BenchmarkSVs/SVisualizer/truvari_tabs.py @@ -1,12 +1,11 @@ import pandas as pd import plotly.express as px -import plotly.graph_objects as go import quickboard.base as qbb import quickboard.plugins as plg -from common_utils import read_and_postprocess, convert_missing_to_pass_filter, add_bbend_stats, sort_svtypes, sort_svlen_bins, sort_overlap_pcts +from common_utils import read_and_postprocess, sort_svtypes, sort_svlen_bins, sort_overlap_pcts from decorators import axes_mode -from plugins import make_type_selector, make_stat_selector, make_length_selector, make_interval_selector, make_axes_mode_selector +from plugins import make_stat_selector, make_interval_selector, make_axes_mode_selector from user_config import COVARIATE_X, EXPERIMENT_ORDER, EXPERIMENT_COLORS, EXPERIMENT_COLOR_DICT, TRUVARI_DUP_TO_INS from truvari_data import postprocess_truvari_bench, postprocess_truvari_closest from upset_plot_utils import create_upset, make_disqualified_df diff --git a/BenchmarkSVs/SVisualizer/upset_plot_utils.py b/BenchmarkSVs/SVisualizer/upset_plot_utils.py index ac7eaaa29..d64e80a78 100644 --- a/BenchmarkSVs/SVisualizer/upset_plot_utils.py +++ b/BenchmarkSVs/SVisualizer/upset_plot_utils.py @@ -2,7 +2,6 @@ import pandas as pd import plotly.express as px import plotly.graph_objects as go -from plotly.subplots import make_subplots def make_disqualified_df(close_df, dist_threshold, size_ratio_threshold, color=None): @@ -510,9 +509,6 @@ def make_margin_plot(self): """ # Group and count according to inputs color = self.color - groups = [x for x in [self.color, self.x] if x is not None] - # if len(groups) > 0: - # counts_df = self.df.groupby(groups).sum().reset_index() if self.color is not None: counts_df = self.df.groupby(self.color).sum().reset_index() if self.x is not None: diff --git a/BenchmarkSVs/SVisualizer/wittyer_tabs.py b/BenchmarkSVs/SVisualizer/wittyer_tabs.py index c6b41850f..7718c5783 100644 --- a/BenchmarkSVs/SVisualizer/wittyer_tabs.py +++ b/BenchmarkSVs/SVisualizer/wittyer_tabs.py @@ -2,6 +2,8 @@ import plotly.express as px import quickboard.base as qbb import quickboard.plugins as plg +import numpy as np +import plotly.graph_objects as go from common_utils import read_and_postprocess, add_bbend_stats from plugins import make_interval_plugin_bundle, make_type_selector, make_filter_selector, make_stat_selector, make_axes_mode_selector diff --git a/BenchmarkVCFs/BenchmarkBoard/BenchmarkBoard.ipynb b/BenchmarkVCFs/BenchmarkBoard/BenchmarkBoard.ipynb index 8ee1abedd..74d3be534 100644 --- a/BenchmarkVCFs/BenchmarkBoard/BenchmarkBoard.ipynb +++ b/BenchmarkVCFs/BenchmarkBoard/BenchmarkBoard.ipynb @@ -8,7 +8,6 @@ "outputs": [], "source": [ "import pandas as pd\n", - "import numpy as np\n", "import plotly.express as px\n", "\n", "import quickboard.base as qbb\n", @@ -376,14 +375,14 @@ " if not SINGLE_SAMPLE_MODE:\n", " marginal = marginal.lower() if marginal != 'None' else None\n", " fig = px.scatter(df, x='Recall', y='Precision', color=cfg.color, marginal_x=marginal, marginal_y=marginal,\n", - " hover_data=['Query_Name'], title=cfg.make_title(prefix=f'Precision vs Recall Plot'), \n", + " hover_data=['Query_Name'], title=cfg.make_title(prefix='Precision vs Recall Plot'), \n", " category_orders=CATEGORY_ORDERS, color_discrete_map=EXPERIMENT_COLOR_MAP)\n", " if axes_mode == 'Fixed':\n", " fig.update_layout(xaxis_range=[0, 1.1], yaxis_range=[0, 1.1])\n", " else:\n", " melted_df = df.melt(id_vars=['Experiment', 'Query_Name', 'Base_Name', 'Interval', 'Type'], value_vars=['Precision', 'Recall', 'F1_Score'])\n", " melted_df = melted_df.rename(columns={'variable': 'Stat', 'value': 'Value'})\n", - " fig = px.bar(melted_df, x='Stat', y='Value', title=cfg.make_title(prefix=f'Performance Stats'), \n", + " fig = px.bar(melted_df, x='Stat', y='Value', title=cfg.make_title(prefix='Performance Stats'), \n", " category_orders=CATEGORY_ORDERS, color_discrete_map=EXPERIMENT_COLOR_MAP)\n", " fig.update_layout(yaxis_range=[0, 1.1])\n", " \n", @@ -451,7 +450,7 @@ " try: \n", " assert isinstance(COVARIATE_X, list)\n", " correlators = COVARIATE_X\n", - " stat_corr_plugins += [\n", + " stat_covariate_plugins += [\n", " plg.PlotInputRadioButtons(\n", " header='x-axis Covariate to Plot',\n", " plot_input='covariate',\n", @@ -528,7 +527,7 @@ " category_orders=CATEGORY_ORDERS, color_discrete_map=EXPERIMENT_COLOR_MAP)\n", " else:\n", " fig = px.line(df, x='Recall', y='Precision', color=cfg.color, line_group='Query_Name', hover_data=['Score'],\n", - " title=cfg.make_title(f'ROC Plot') + ' stratified by Score')\n", + " title=cfg.make_title('ROC Plot') + ' stratified by Score')\n", " fig.update_layout(xaxis_range=[0, 1.1])\n", "\n", " if axes_mode == 'Fixed':\n", diff --git a/ImputationPipeline/Liftover/LiftoverSites/LiftoverSites/__init__.py b/ImputationPipeline/Liftover/LiftoverSites/LiftoverSites/__init__.py index fe5c0bb10..9535f44f6 100644 --- a/ImputationPipeline/Liftover/LiftoverSites/LiftoverSites/__init__.py +++ b/ImputationPipeline/Liftover/LiftoverSites/LiftoverSites/__init__.py @@ -1 +1,3 @@ from .LiftoverSites import LiftoverSites + +__all__ = ["LiftoverSites"] \ No newline at end of file diff --git a/ImputationPipeline/ScoreBGE/ScoreBGE.py b/ImputationPipeline/ScoreBGE/ScoreBGE.py index 5c1b6bad4..ba8d34add 100644 --- a/ImputationPipeline/ScoreBGE/ScoreBGE.py +++ b/ImputationPipeline/ScoreBGE/ScoreBGE.py @@ -99,7 +99,7 @@ def _print_wes_gvcf_metrics(self): num_low_quality_sites = {sample_name: len(self.gvcf_low_quality_sites[sample_name]) for sample_name in self.sample_names} num_low_quality_sites_min_max = min(num_low_quality_sites.values()), max(num_low_quality_sites.values()) - print(f' Metrics:') + print(' Metrics:') print(f' Sites scored: Min: {num_sites_scored_min_max[0]} Max: {num_sites_scored_min_max[1]}') print(f' Low quality sites: Min: {num_low_quality_sites_min_max[0]} Max: {num_low_quality_sites_min_max[1]}') @@ -107,7 +107,7 @@ def _print_wgs_vcf_metrics(self): num_sites_scored = {sample_name: len(self.vcf_sites_scored[sample_name]) for sample_name in self.sample_names} num_sites_scored_min_max = min(num_sites_scored.values()), max(num_sites_scored.values()) - print(f' Metrics:') + print(' Metrics:') print(f' Sites scored: Min: {num_sites_scored_min_max[0]} Max: {num_sites_scored_min_max[1]}') print(f' Sites not found: {self.vcf_num_sites_not_found}') @@ -115,7 +115,7 @@ def _print_wes_and_wgs_metrics(self): total_sites_scored = {sample_name: len(self.gvcf_sites_scored[sample_name]) + len(self.vcf_sites_scored[sample_name]) for sample_name in self.sample_names} sites_scored_min_max = min(total_sites_scored.values()), max(total_sites_scored.values()) - print(f'WES GVCF + WGS VCF Scoring:') + print('WES GVCF + WGS VCF Scoring:') print(f' Total sites scored: Min: {sites_scored_min_max[0]} Max: {sites_scored_min_max[1]}') def _process_weight_wes(self, weight, gvcf, site_gq_threshold, out_sites_scored): diff --git a/ImputationPipeline/ScoreBGE/requirements.txt b/ImputationPipeline/ScoreBGE/requirements.txt new file mode 100644 index 000000000..fb1c0a5a1 --- /dev/null +++ b/ImputationPipeline/ScoreBGE/requirements.txt @@ -0,0 +1,3 @@ +pysam==0.20.0 +pandas==1.3.4 +numpy==1.21.4 \ No newline at end of file diff --git a/LongReadRNABenchmark/lr_isoform_custom_docker/plot_analysis_summary.py b/LongReadRNABenchmark/lr_isoform_custom_docker/plot_analysis_summary.py index b3c13d64a..095de70e1 100644 --- a/LongReadRNABenchmark/lr_isoform_custom_docker/plot_analysis_summary.py +++ b/LongReadRNABenchmark/lr_isoform_custom_docker/plot_analysis_summary.py @@ -1,8 +1,6 @@ import argparse import matplotlib.pyplot as plt -import pandas as pd import seaborn as sns -import sys parser = argparse.ArgumentParser(description = "Generate plot for the isoform reconstruction analysis summary.") parser.add_argument("-i", "--input", required = True) @@ -47,7 +45,7 @@ ax[1].bar(tools, precision_list, color = colors[1:7]) ax[2].bar(tools, f1_score_list, color = colors[1:7]) -if args.save == True: +if args.save: plt.savefig(args.dataset_name + "_analysis_summary_" + args.type + ".png") else: plt.show() diff --git a/LongReadRNABenchmark/lr_isoform_custom_docker/plot_denovo_analysis_summary.py b/LongReadRNABenchmark/lr_isoform_custom_docker/plot_denovo_analysis_summary.py index c4e27eb00..92dea7c25 100644 --- a/LongReadRNABenchmark/lr_isoform_custom_docker/plot_denovo_analysis_summary.py +++ b/LongReadRNABenchmark/lr_isoform_custom_docker/plot_denovo_analysis_summary.py @@ -1,9 +1,7 @@ import argparse import matplotlib.pyplot as plt import numpy as np -import pandas as pd import seaborn as sns -import sys parser = argparse.ArgumentParser(description = "Generate plot for the isoform reconstruction denovo analysis statistics.") parser.add_argument("-i", "--input", required = True) @@ -59,7 +57,7 @@ ax.set_xticks(x + width, tools) ax.legend(loc = "upper left", ncols = len(tools)) -if args.save == True: +if args.save: plt.savefig(args.dataset_name + "_analysis_summary_denovo_" + args.type + ".png") else: plt.show() diff --git a/LongReadRNABenchmark/lr_isoform_custom_docker/summarize_analysis.py b/LongReadRNABenchmark/lr_isoform_custom_docker/summarize_analysis.py index a8be259aa..0f045bf0b 100644 --- a/LongReadRNABenchmark/lr_isoform_custom_docker/summarize_analysis.py +++ b/LongReadRNABenchmark/lr_isoform_custom_docker/summarize_analysis.py @@ -1,5 +1,4 @@ import sys -import os import argparse import math diff --git a/LongReadRNABenchmark/lr_isoform_custom_docker/summarize_denovo_analysis.py b/LongReadRNABenchmark/lr_isoform_custom_docker/summarize_denovo_analysis.py index a8067eb2c..0ea403a9e 100644 --- a/LongReadRNABenchmark/lr_isoform_custom_docker/summarize_denovo_analysis.py +++ b/LongReadRNABenchmark/lr_isoform_custom_docker/summarize_denovo_analysis.py @@ -1,5 +1,4 @@ import sys -import os import argparse # reliable: all methods detected diff --git a/MultiQC_Terra/MultiQC.ipynb b/MultiQC_Terra/MultiQC.ipynb index 5313b0b68..f54ce6030 100644 --- a/MultiQC_Terra/MultiQC.ipynb +++ b/MultiQC_Terra/MultiQC.ipynb @@ -24,8 +24,8 @@ "metadata": {}, "outputs": [], "source": [ - "# !pip install git+https://github.com/kachulis/MultiQC.git@ck_gcp\n", "# import os\n", + "# !pip install git+https://github.com/kachulis/MultiQC.git@ck_gcp\n", "# os._exit(00)" ] }, @@ -71,12 +71,13 @@ "from multiqc.utils import report, config\n", "import os\n", "from google.cloud import storage\n", - "storage_client = storage.Client()\n", - "import subprocess\n", "import pathlib\n", "import pytz\n", "from datetime import datetime\n", "\n", + "storage_client = storage.Client()\n", + "\n", + "\n", "config.mqc_load_userconfig()\n", "\n", "def flat(pool):\n", @@ -112,7 +113,7 @@ " workflow_with_submissions = [id_tuple for id_tuples in await asyncio.gather(*tasks) for id_tuple in id_tuples]\n", "\n", "if worklow_ids_to_include is not None:\n", - " workflow_with_submissions = [e for e in workflow_with_sumbissions if e[1] in worklow_ids_to_include]\n", + " workflow_with_submissions = [e for e in workflow_with_submissions if e[1] in worklow_ids_to_include]\n", "print(f'Found {len(workflow_with_submissions)} workflows.')\n", "\n", "\n", @@ -161,7 +162,7 @@ "print(f'Workflows found: {workflow_names}')\n", "\n", "# remove duplicates\n", - "print(f'Removing duplicates/reruns...')\n", + "print('Removing duplicates/reruns...')\n", "metrics_file_groups = defaultdict(list)\n", "for wn, on, st in metrics_file_paths:\n", " metrics_file_groups[(wn, os.path.basename(on))].append((wn, on, st))\n", @@ -240,4 +241,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/Utilities/Dockers/Alphashape/generate_alphashape.py b/Utilities/Dockers/Alphashape/generate_alphashape.py index 268f48007..251ac17ac 100644 --- a/Utilities/Dockers/Alphashape/generate_alphashape.py +++ b/Utilities/Dockers/Alphashape/generate_alphashape.py @@ -1,7 +1,5 @@ import argparse import pickle -import sys -import numpy as np import pandas as pd import alphashape diff --git a/Utilities/Dockers/Alphashape/pca_novelty_detection.py b/Utilities/Dockers/Alphashape/pca_novelty_detection.py index 91ae4dd5a..a3d154aeb 100644 --- a/Utilities/Dockers/Alphashape/pca_novelty_detection.py +++ b/Utilities/Dockers/Alphashape/pca_novelty_detection.py @@ -1,13 +1,9 @@ import argparse import pickle -import sys -import numpy as np import pandas as pd import matplotlib.pyplot as plt -import matplotlib.patches as mpatches from descartes import PolygonPatch from shapely.geometry import Point -import alphashape # Parse command-line arguments parser = argparse.ArgumentParser(description = "Automatically flag novelties in 2D PCA plots using Concave Hulls generated via alphashapes.") diff --git a/test/choose_watt_tests/choose_watt_tests.py b/test/choose_watt_tests/choose_watt_tests.py index 8e89dbfab..d4bb1eda1 100644 --- a/test/choose_watt_tests/choose_watt_tests.py +++ b/test/choose_watt_tests/choose_watt_tests.py @@ -33,8 +33,7 @@ def resolve_relative_path(rel_path: str) -> str: return rel_path def get_wdl_dependencies(womtool_run: subprocess.CompletedProcess): - womtool_stdout = womtool_run.stdout - return [l.decode() for l in womtool_run.stdout.splitlines() if l.endswith(b'.wdl')] + return [line.decode() for line in womtool_run.stdout.splitlines() if line.endswith(b'.wdl')] if __name__ == '__main__': args = parser.parse_args()