Python testing and linting (#191)

broadinstitute · Feb 7, 2025 · 6b5f1a6 · 6b5f1a6
1 parent bff59b5
commit 6b5f1a6
Show file tree

Hide file tree

Showing 19 changed files with 73 additions and 50 deletions.
diff --git a/.github/workflows/run_tests.yaml b/.github/workflows/run_tests.yaml
@@ -1,4 +1,4 @@
-name: WDL Tests
+name: Tests
 on: 
   push:
     branches:
@@ -10,7 +10,7 @@ concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
   cancel-in-progress: true
 jobs:
-  validate:
+  validate_wdl:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
@@ -20,8 +20,8 @@ jobs:
       - name: Womtool Validate
         run: find . -name '*.wdl' | xargs -tI {} java -jar ${WOMTOOL_JAR} validate {}
 
-  test:
-    needs: validate
+  test_wdl:
+    needs: validate_wdl
     runs-on: ubuntu-latest
     permissions:
       contents: 'read'
@@ -77,4 +77,38 @@ jobs:
         if: always()
         with:
           name: cromwell_logs
-          path: cromwell_logs/
+          path: cromwell_logs/
+
+  python_lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.9'
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+      - name: Lint with Ruff
+        run: |
+          pip install ruff
+          ruff check --output-format=github .
+
+  python_test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.9'
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+      - name: ScoreBGE Unit tests
+        run: |
+          pip install -r ImputationPipeline/ScoreBGE/requirements.txt
+          cd ImputationPipeline/ScoreBGE/tests
+          export PYTHONPATH=$GITHUB_WORKSPACE
+          python -m unittest test_ScoreBGE.py
diff --git a/BenchmarkSVs/SVisualizer/gather_terra_data.py b/BenchmarkSVs/SVisualizer/gather_terra_data.py
@@ -1,10 +1,10 @@
-print("Importing tools for script...")
-import sys, os, shutil
+import sys
+import os
+import shutil
 import json
 import tarfile
 import firecloud.api as fapi
 import pandas as pd
-from datetime import datetime
 
 
 # Parse cmd args
@@ -64,7 +64,7 @@
         else:
             print(f"WARNING: Workflow {wf_json['workflowId']} seems to have failed... Skipping data collection for this run.")
     else:
-        print(f"WARNING: Workflow seems to have failed to launch... Skipping data collection for this run.")
+        print("WARNING: Workflow seems to have failed to launch... Skipping data collection for this run.")
 
 print('Consolidating files across workflow runs...')
 # Get list of file names across the different stat categories
@@ -97,8 +97,8 @@
 with open('./wdl_outputs/README.txt', 'w') as file:
     lines = []
     lines += ['Files in this directory were created using the gather_terra_data.py script provided with the SVisualizer script.\n']
-    lines += [f'Files copied on: {datetime.now().strftime("%d/%m/%Y %H:%M:%S")}\n']
-    lines += [f'Taken from:\n']
+    lines += ['Files copied on: {datetime.now().strftime("%d/%m/%Y %H:%M:%S")}\n']
+    lines += ['Taken from:\n']
     lines += [f'\tNamespace: {NAMESPACE}\n']
     lines += [f'\tWorkspace: {WORKSPACE}\n']
     lines += [f'\tSubmission ID: {SUBMISSION_ID}\n']

diff --git a/BenchmarkSVs/SVisualizer/qc_tabs.py b/BenchmarkSVs/SVisualizer/qc_tabs.py
@@ -165,7 +165,7 @@ def make_hwe_plot(df, interval_name, breakpoint, pct_overlap):
         )
     else:
         fig.add_annotation(
-            text=f"No sites fit criteria selected",
+            text="No sites fit criteria selected",
             xref="paper", yref="paper",
             x=0, y=.9,
             showarrow=False

diff --git a/BenchmarkSVs/SVisualizer/truvari_data.py b/BenchmarkSVs/SVisualizer/truvari_data.py
@@ -1,6 +1,6 @@
 import pandas as pd
 
-from common_utils import read_and_postprocess, convert_missing_to_pass_filter, add_bbend_stats
+from common_utils import convert_missing_to_pass_filter
 from user_config import MAKE_MISSING_PASS_FILTER, TRUVARI_DUP_TO_INS
 
 

diff --git a/BenchmarkSVs/SVisualizer/truvari_tabs.py b/BenchmarkSVs/SVisualizer/truvari_tabs.py
@@ -1,12 +1,11 @@
 import pandas as pd
 import plotly.express as px
-import plotly.graph_objects as go
 import quickboard.base as qbb
 import quickboard.plugins as plg
 
-from common_utils import read_and_postprocess, convert_missing_to_pass_filter, add_bbend_stats, sort_svtypes, sort_svlen_bins, sort_overlap_pcts
+from common_utils import read_and_postprocess, sort_svtypes, sort_svlen_bins, sort_overlap_pcts
 from decorators import axes_mode
-from plugins import make_type_selector, make_stat_selector, make_length_selector, make_interval_selector, make_axes_mode_selector
+from plugins import make_stat_selector, make_interval_selector, make_axes_mode_selector
 from user_config import COVARIATE_X, EXPERIMENT_ORDER, EXPERIMENT_COLORS, EXPERIMENT_COLOR_DICT, TRUVARI_DUP_TO_INS
 from truvari_data import postprocess_truvari_bench, postprocess_truvari_closest
 from upset_plot_utils import create_upset, make_disqualified_df

diff --git a/BenchmarkSVs/SVisualizer/upset_plot_utils.py b/BenchmarkSVs/SVisualizer/upset_plot_utils.py
@@ -2,7 +2,6 @@
 import pandas as pd
 import plotly.express as px
 import plotly.graph_objects as go
-from plotly.subplots import make_subplots
 
 
 def make_disqualified_df(close_df, dist_threshold, size_ratio_threshold, color=None):
@@ -510,9 +509,6 @@ def make_margin_plot(self):
         """
         # Group and count according to inputs
         color = self.color
-        groups = [x for x in [self.color, self.x] if x is not None]
-        # if len(groups) > 0:
-        #     counts_df = self.df.groupby(groups).sum().reset_index()
         if self.color is not None:
             counts_df = self.df.groupby(self.color).sum().reset_index()
             if self.x is not None:

diff --git a/BenchmarkSVs/SVisualizer/wittyer_tabs.py b/BenchmarkSVs/SVisualizer/wittyer_tabs.py
@@ -2,6 +2,8 @@
 import plotly.express as px
 import quickboard.base as qbb
 import quickboard.plugins as plg
+import numpy as np
+import plotly.graph_objects as go
 
 from common_utils import read_and_postprocess, add_bbend_stats
 from plugins import make_interval_plugin_bundle, make_type_selector, make_filter_selector, make_stat_selector, make_axes_mode_selector

diff --git a/BenchmarkVCFs/BenchmarkBoard/BenchmarkBoard.ipynb b/BenchmarkVCFs/BenchmarkBoard/BenchmarkBoard.ipynb
@@ -8,7 +8,6 @@
    "outputs": [],
    "source": [
     "import pandas as pd\n",
-    "import numpy as np\n",
     "import plotly.express as px\n",
     "\n",
     "import quickboard.base as qbb\n",
@@ -376,14 +375,14 @@
     "    if not SINGLE_SAMPLE_MODE:\n",
     "        marginal = marginal.lower() if marginal != 'None' else None\n",
     "        fig = px.scatter(df, x='Recall', y='Precision', color=cfg.color, marginal_x=marginal, marginal_y=marginal,\n",
-    "                          hover_data=['Query_Name'], title=cfg.make_title(prefix=f'Precision vs Recall Plot'), \n",
+    "                          hover_data=['Query_Name'], title=cfg.make_title(prefix='Precision vs Recall Plot'), \n",
     "                          category_orders=CATEGORY_ORDERS, color_discrete_map=EXPERIMENT_COLOR_MAP)\n",
     "        if axes_mode == 'Fixed':\n",
     "            fig.update_layout(xaxis_range=[0, 1.1], yaxis_range=[0, 1.1])\n",
     "    else:\n",
     "        melted_df = df.melt(id_vars=['Experiment', 'Query_Name', 'Base_Name', 'Interval', 'Type'], value_vars=['Precision', 'Recall', 'F1_Score'])\n",
     "        melted_df = melted_df.rename(columns={'variable': 'Stat', 'value': 'Value'})\n",
-    "        fig = px.bar(melted_df, x='Stat', y='Value', title=cfg.make_title(prefix=f'Performance Stats'), \n",
+    "        fig = px.bar(melted_df, x='Stat', y='Value', title=cfg.make_title(prefix='Performance Stats'), \n",
     "                     category_orders=CATEGORY_ORDERS, color_discrete_map=EXPERIMENT_COLOR_MAP)\n",
     "        fig.update_layout(yaxis_range=[0, 1.1])\n",
     "    \n",
@@ -451,7 +450,7 @@
     "        try: \n",
     "            assert isinstance(COVARIATE_X, list)\n",
     "            correlators = COVARIATE_X\n",
-    "            stat_corr_plugins += [\n",
+    "            stat_covariate_plugins += [\n",
     "                plg.PlotInputRadioButtons(\n",
     "                    header='x-axis Covariate to Plot',\n",
     "                    plot_input='covariate',\n",
@@ -528,7 +527,7 @@
     "                      category_orders=CATEGORY_ORDERS, color_discrete_map=EXPERIMENT_COLOR_MAP)\n",
     "    else:\n",
     "        fig = px.line(df, x='Recall', y='Precision', color=cfg.color, line_group='Query_Name', hover_data=['Score'],\n",
-    "                      title=cfg.make_title(f'ROC Plot') + ' stratified by Score')\n",
+    "                      title=cfg.make_title('ROC Plot') + ' stratified by Score')\n",
     "        fig.update_layout(xaxis_range=[0, 1.1])\n",
     "\n",
     "    if axes_mode == 'Fixed':\n",

diff --git a/ImputationPipeline/Liftover/LiftoverSites/LiftoverSites/__init__.py b/ImputationPipeline/Liftover/LiftoverSites/LiftoverSites/__init__.py
@@ -1 +1,3 @@
 from .LiftoverSites import LiftoverSites
+
+__all__ = ["LiftoverSites"]
diff --git a/ImputationPipeline/ScoreBGE/ScoreBGE.py b/ImputationPipeline/ScoreBGE/ScoreBGE.py
@@ -99,23 +99,23 @@ def _print_wes_gvcf_metrics(self):
         num_low_quality_sites = {sample_name: len(self.gvcf_low_quality_sites[sample_name]) for sample_name in self.sample_names}
         num_low_quality_sites_min_max = min(num_low_quality_sites.values()), max(num_low_quality_sites.values())
 
-        print(f'  Metrics:')
+        print('  Metrics:')
         print(f'    Sites scored: Min: {num_sites_scored_min_max[0]} Max: {num_sites_scored_min_max[1]}')
         print(f'    Low quality sites: Min: {num_low_quality_sites_min_max[0]} Max: {num_low_quality_sites_min_max[1]}')
 
     def _print_wgs_vcf_metrics(self):
         num_sites_scored = {sample_name: len(self.vcf_sites_scored[sample_name]) for sample_name in self.sample_names}
         num_sites_scored_min_max = min(num_sites_scored.values()), max(num_sites_scored.values())
 
-        print(f'  Metrics:')
+        print('  Metrics:')
         print(f'    Sites scored: Min: {num_sites_scored_min_max[0]} Max: {num_sites_scored_min_max[1]}')
         print(f'    Sites not found: {self.vcf_num_sites_not_found}')
 
     def _print_wes_and_wgs_metrics(self):
         total_sites_scored = {sample_name: len(self.gvcf_sites_scored[sample_name]) + len(self.vcf_sites_scored[sample_name]) for sample_name in self.sample_names}
         sites_scored_min_max = min(total_sites_scored.values()), max(total_sites_scored.values())
 
-        print(f'WES GVCF + WGS VCF Scoring:')
+        print('WES GVCF + WGS VCF Scoring:')
         print(f'    Total sites scored: Min: {sites_scored_min_max[0]} Max: {sites_scored_min_max[1]}')
 
     def _process_weight_wes(self, weight, gvcf, site_gq_threshold, out_sites_scored):

diff --git a/ImputationPipeline/ScoreBGE/requirements.txt b/ImputationPipeline/ScoreBGE/requirements.txt
@@ -0,0 +1,3 @@
+pysam==0.20.0
+pandas==1.3.4
+numpy==1.21.4
diff --git a/LongReadRNABenchmark/lr_isoform_custom_docker/plot_analysis_summary.py b/LongReadRNABenchmark/lr_isoform_custom_docker/plot_analysis_summary.py
@@ -1,8 +1,6 @@
 import argparse
 import matplotlib.pyplot as plt
-import pandas as pd
 import seaborn as sns
-import sys
 
 parser = argparse.ArgumentParser(description = "Generate plot for the isoform reconstruction analysis summary.")
 parser.add_argument("-i", "--input", required = True)
@@ -47,7 +45,7 @@
 ax[1].bar(tools, precision_list, color = colors[1:7])
 ax[2].bar(tools, f1_score_list, color = colors[1:7])
 
-if args.save == True:
+if args.save:
 	plt.savefig(args.dataset_name + "_analysis_summary_" + args.type + ".png")
 else:
 	plt.show()
diff --git a/LongReadRNABenchmark/lr_isoform_custom_docker/plot_denovo_analysis_summary.py b/LongReadRNABenchmark/lr_isoform_custom_docker/plot_denovo_analysis_summary.py
@@ -1,9 +1,7 @@
 import argparse
 import matplotlib.pyplot as plt
 import numpy as np
-import pandas as pd
 import seaborn as sns
-import sys
 
 parser = argparse.ArgumentParser(description = "Generate plot for the isoform reconstruction denovo analysis statistics.")
 parser.add_argument("-i", "--input", required = True)
@@ -59,7 +57,7 @@
 ax.set_xticks(x + width, tools)
 ax.legend(loc = "upper left", ncols = len(tools))
 
-if args.save == True:
+if args.save:
 	plt.savefig(args.dataset_name + "_analysis_summary_denovo_" + args.type + ".png")
 else:
 	plt.show()
diff --git a/LongReadRNABenchmark/lr_isoform_custom_docker/summarize_analysis.py b/LongReadRNABenchmark/lr_isoform_custom_docker/summarize_analysis.py
@@ -1,5 +1,4 @@
 import sys
-import os
 import argparse
 import math
 

diff --git a/LongReadRNABenchmark/lr_isoform_custom_docker/summarize_denovo_analysis.py b/LongReadRNABenchmark/lr_isoform_custom_docker/summarize_denovo_analysis.py
@@ -1,5 +1,4 @@
 import sys
-import os
 import argparse
 
 # reliable: all methods detected

diff --git a/MultiQC_Terra/MultiQC.ipynb b/MultiQC_Terra/MultiQC.ipynb
@@ -24,8 +24,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# !pip install git+https://github.com/kachulis/MultiQC.git@ck_gcp\n",
     "# import os\n",
+    "# !pip install git+https://github.com/kachulis/MultiQC.git@ck_gcp\n",
     "# os._exit(00)"
    ]
   },
@@ -71,12 +71,13 @@
     "from multiqc.utils import report, config\n",
     "import os\n",
     "from google.cloud import storage\n",
-    "storage_client = storage.Client()\n",
-    "import subprocess\n",
     "import pathlib\n",
     "import pytz\n",
     "from datetime import datetime\n",
     "\n",
+    "storage_client = storage.Client()\n",
+    "\n",
+    "\n",
     "config.mqc_load_userconfig()\n",
     "\n",
     "def flat(pool):\n",
@@ -112,7 +113,7 @@
     "    workflow_with_submissions = [id_tuple for id_tuples in await asyncio.gather(*tasks) for id_tuple in id_tuples]\n",
     "\n",
     "if worklow_ids_to_include is not None:\n",
-    "    workflow_with_submissions = [e for e in workflow_with_sumbissions if e[1] in worklow_ids_to_include]\n",
+    "    workflow_with_submissions = [e for e in workflow_with_submissions if e[1] in worklow_ids_to_include]\n",
     "print(f'Found {len(workflow_with_submissions)} workflows.')\n",
     "\n",
     "\n",
@@ -161,7 +162,7 @@
     "print(f'Workflows found: {workflow_names}')\n",
     "\n",
     "# remove duplicates\n",
-    "print(f'Removing duplicates/reruns...')\n",
+    "print('Removing duplicates/reruns...')\n",
     "metrics_file_groups = defaultdict(list)\n",
     "for wn, on, st in metrics_file_paths:\n",
     "    metrics_file_groups[(wn, os.path.basename(on))].append((wn, on, st))\n",
@@ -240,4 +241,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
diff --git a/Utilities/Dockers/Alphashape/generate_alphashape.py b/Utilities/Dockers/Alphashape/generate_alphashape.py
@@ -1,7 +1,5 @@
 import argparse
 import pickle
-import sys
-import numpy as np
 import pandas as pd
 import alphashape
 

diff --git a/Utilities/Dockers/Alphashape/pca_novelty_detection.py b/Utilities/Dockers/Alphashape/pca_novelty_detection.py
@@ -1,13 +1,9 @@
 import argparse
 import pickle
-import sys
-import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
-import matplotlib.patches as mpatches
 from descartes import PolygonPatch
 from shapely.geometry import Point
-import alphashape
 
 # Parse command-line arguments
 parser = argparse.ArgumentParser(description = "Automatically flag novelties in 2D PCA plots using Concave Hulls generated via alphashapes.")

diff --git a/test/choose_watt_tests/choose_watt_tests.py b/test/choose_watt_tests/choose_watt_tests.py
@@ -33,8 +33,7 @@ def resolve_relative_path(rel_path: str) -> str:
         return rel_path
 
 def get_wdl_dependencies(womtool_run: subprocess.CompletedProcess):
-    womtool_stdout = womtool_run.stdout
-    return [l.decode() for l in womtool_run.stdout.splitlines() if l.endswith(b'.wdl')]
+    return [line.decode() for line in womtool_run.stdout.splitlines() if line.endswith(b'.wdl')]
 
 if __name__ == '__main__':
     args = parser.parse_args()
Original file line number	Diff line number	Diff line change
		@@ -1 +1,3 @@
		from .LiftoverSites import LiftoverSites

		__all__ = ["LiftoverSites"]