Skip to content

Commit

Permalink
Python testing and linting (#191)
Browse files Browse the repository at this point in the history
  • Loading branch information
kachulis authored Feb 7, 2025
1 parent bff59b5 commit 6b5f1a6
Show file tree
Hide file tree
Showing 19 changed files with 73 additions and 50 deletions.
44 changes: 39 additions & 5 deletions .github/workflows/run_tests.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: WDL Tests
name: Tests
on:
push:
branches:
Expand All @@ -10,7 +10,7 @@ concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
validate:
validate_wdl:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
Expand All @@ -20,8 +20,8 @@ jobs:
- name: Womtool Validate
run: find . -name '*.wdl' | xargs -tI {} java -jar ${WOMTOOL_JAR} validate {}

test:
needs: validate
test_wdl:
needs: validate_wdl
runs-on: ubuntu-latest
permissions:
contents: 'read'
Expand Down Expand Up @@ -77,4 +77,38 @@ jobs:
if: always()
with:
name: cromwell_logs
path: cromwell_logs/
path: cromwell_logs/

python_lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.9'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
- name: Lint with Ruff
run: |
pip install ruff
ruff check --output-format=github .
python_test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.9'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
- name: ScoreBGE Unit tests
run: |
pip install -r ImputationPipeline/ScoreBGE/requirements.txt
cd ImputationPipeline/ScoreBGE/tests
export PYTHONPATH=$GITHUB_WORKSPACE
python -m unittest test_ScoreBGE.py
12 changes: 6 additions & 6 deletions BenchmarkSVs/SVisualizer/gather_terra_data.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
print("Importing tools for script...")
import sys, os, shutil
import sys
import os
import shutil
import json
import tarfile
import firecloud.api as fapi
import pandas as pd
from datetime import datetime


# Parse cmd args
Expand Down Expand Up @@ -64,7 +64,7 @@
else:
print(f"WARNING: Workflow {wf_json['workflowId']} seems to have failed... Skipping data collection for this run.")
else:
print(f"WARNING: Workflow seems to have failed to launch... Skipping data collection for this run.")
print("WARNING: Workflow seems to have failed to launch... Skipping data collection for this run.")

print('Consolidating files across workflow runs...')
# Get list of file names across the different stat categories
Expand Down Expand Up @@ -97,8 +97,8 @@
with open('./wdl_outputs/README.txt', 'w') as file:
lines = []
lines += ['Files in this directory were created using the gather_terra_data.py script provided with the SVisualizer script.\n']
lines += [f'Files copied on: {datetime.now().strftime("%d/%m/%Y %H:%M:%S")}\n']
lines += [f'Taken from:\n']
lines += ['Files copied on: {datetime.now().strftime("%d/%m/%Y %H:%M:%S")}\n']
lines += ['Taken from:\n']
lines += [f'\tNamespace: {NAMESPACE}\n']
lines += [f'\tWorkspace: {WORKSPACE}\n']
lines += [f'\tSubmission ID: {SUBMISSION_ID}\n']
Expand Down
2 changes: 1 addition & 1 deletion BenchmarkSVs/SVisualizer/qc_tabs.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def make_hwe_plot(df, interval_name, breakpoint, pct_overlap):
)
else:
fig.add_annotation(
text=f"No sites fit criteria selected",
text="No sites fit criteria selected",
xref="paper", yref="paper",
x=0, y=.9,
showarrow=False
Expand Down
2 changes: 1 addition & 1 deletion BenchmarkSVs/SVisualizer/truvari_data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pandas as pd

from common_utils import read_and_postprocess, convert_missing_to_pass_filter, add_bbend_stats
from common_utils import convert_missing_to_pass_filter
from user_config import MAKE_MISSING_PASS_FILTER, TRUVARI_DUP_TO_INS


Expand Down
5 changes: 2 additions & 3 deletions BenchmarkSVs/SVisualizer/truvari_tabs.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import quickboard.base as qbb
import quickboard.plugins as plg

from common_utils import read_and_postprocess, convert_missing_to_pass_filter, add_bbend_stats, sort_svtypes, sort_svlen_bins, sort_overlap_pcts
from common_utils import read_and_postprocess, sort_svtypes, sort_svlen_bins, sort_overlap_pcts
from decorators import axes_mode
from plugins import make_type_selector, make_stat_selector, make_length_selector, make_interval_selector, make_axes_mode_selector
from plugins import make_stat_selector, make_interval_selector, make_axes_mode_selector
from user_config import COVARIATE_X, EXPERIMENT_ORDER, EXPERIMENT_COLORS, EXPERIMENT_COLOR_DICT, TRUVARI_DUP_TO_INS
from truvari_data import postprocess_truvari_bench, postprocess_truvari_closest
from upset_plot_utils import create_upset, make_disqualified_df
Expand Down
4 changes: 0 additions & 4 deletions BenchmarkSVs/SVisualizer/upset_plot_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots


def make_disqualified_df(close_df, dist_threshold, size_ratio_threshold, color=None):
Expand Down Expand Up @@ -510,9 +509,6 @@ def make_margin_plot(self):
"""
# Group and count according to inputs
color = self.color
groups = [x for x in [self.color, self.x] if x is not None]
# if len(groups) > 0:
# counts_df = self.df.groupby(groups).sum().reset_index()
if self.color is not None:
counts_df = self.df.groupby(self.color).sum().reset_index()
if self.x is not None:
Expand Down
2 changes: 2 additions & 0 deletions BenchmarkSVs/SVisualizer/wittyer_tabs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import plotly.express as px
import quickboard.base as qbb
import quickboard.plugins as plg
import numpy as np
import plotly.graph_objects as go

from common_utils import read_and_postprocess, add_bbend_stats
from plugins import make_interval_plugin_bundle, make_type_selector, make_filter_selector, make_stat_selector, make_axes_mode_selector
Expand Down
9 changes: 4 additions & 5 deletions BenchmarkVCFs/BenchmarkBoard/BenchmarkBoard.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import plotly.express as px\n",
"\n",
"import quickboard.base as qbb\n",
Expand Down Expand Up @@ -376,14 +375,14 @@
" if not SINGLE_SAMPLE_MODE:\n",
" marginal = marginal.lower() if marginal != 'None' else None\n",
" fig = px.scatter(df, x='Recall', y='Precision', color=cfg.color, marginal_x=marginal, marginal_y=marginal,\n",
" hover_data=['Query_Name'], title=cfg.make_title(prefix=f'Precision vs Recall Plot'), \n",
" hover_data=['Query_Name'], title=cfg.make_title(prefix='Precision vs Recall Plot'), \n",
" category_orders=CATEGORY_ORDERS, color_discrete_map=EXPERIMENT_COLOR_MAP)\n",
" if axes_mode == 'Fixed':\n",
" fig.update_layout(xaxis_range=[0, 1.1], yaxis_range=[0, 1.1])\n",
" else:\n",
" melted_df = df.melt(id_vars=['Experiment', 'Query_Name', 'Base_Name', 'Interval', 'Type'], value_vars=['Precision', 'Recall', 'F1_Score'])\n",
" melted_df = melted_df.rename(columns={'variable': 'Stat', 'value': 'Value'})\n",
" fig = px.bar(melted_df, x='Stat', y='Value', title=cfg.make_title(prefix=f'Performance Stats'), \n",
" fig = px.bar(melted_df, x='Stat', y='Value', title=cfg.make_title(prefix='Performance Stats'), \n",
" category_orders=CATEGORY_ORDERS, color_discrete_map=EXPERIMENT_COLOR_MAP)\n",
" fig.update_layout(yaxis_range=[0, 1.1])\n",
" \n",
Expand Down Expand Up @@ -451,7 +450,7 @@
" try: \n",
" assert isinstance(COVARIATE_X, list)\n",
" correlators = COVARIATE_X\n",
" stat_corr_plugins += [\n",
" stat_covariate_plugins += [\n",
" plg.PlotInputRadioButtons(\n",
" header='x-axis Covariate to Plot',\n",
" plot_input='covariate',\n",
Expand Down Expand Up @@ -528,7 +527,7 @@
" category_orders=CATEGORY_ORDERS, color_discrete_map=EXPERIMENT_COLOR_MAP)\n",
" else:\n",
" fig = px.line(df, x='Recall', y='Precision', color=cfg.color, line_group='Query_Name', hover_data=['Score'],\n",
" title=cfg.make_title(f'ROC Plot') + ' stratified by Score')\n",
" title=cfg.make_title('ROC Plot') + ' stratified by Score')\n",
" fig.update_layout(xaxis_range=[0, 1.1])\n",
"\n",
" if axes_mode == 'Fixed':\n",
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
from .LiftoverSites import LiftoverSites

__all__ = ["LiftoverSites"]
6 changes: 3 additions & 3 deletions ImputationPipeline/ScoreBGE/ScoreBGE.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,23 +99,23 @@ def _print_wes_gvcf_metrics(self):
num_low_quality_sites = {sample_name: len(self.gvcf_low_quality_sites[sample_name]) for sample_name in self.sample_names}
num_low_quality_sites_min_max = min(num_low_quality_sites.values()), max(num_low_quality_sites.values())

print(f' Metrics:')
print(' Metrics:')
print(f' Sites scored: Min: {num_sites_scored_min_max[0]} Max: {num_sites_scored_min_max[1]}')
print(f' Low quality sites: Min: {num_low_quality_sites_min_max[0]} Max: {num_low_quality_sites_min_max[1]}')

def _print_wgs_vcf_metrics(self):
num_sites_scored = {sample_name: len(self.vcf_sites_scored[sample_name]) for sample_name in self.sample_names}
num_sites_scored_min_max = min(num_sites_scored.values()), max(num_sites_scored.values())

print(f' Metrics:')
print(' Metrics:')
print(f' Sites scored: Min: {num_sites_scored_min_max[0]} Max: {num_sites_scored_min_max[1]}')
print(f' Sites not found: {self.vcf_num_sites_not_found}')

def _print_wes_and_wgs_metrics(self):
total_sites_scored = {sample_name: len(self.gvcf_sites_scored[sample_name]) + len(self.vcf_sites_scored[sample_name]) for sample_name in self.sample_names}
sites_scored_min_max = min(total_sites_scored.values()), max(total_sites_scored.values())

print(f'WES GVCF + WGS VCF Scoring:')
print('WES GVCF + WGS VCF Scoring:')
print(f' Total sites scored: Min: {sites_scored_min_max[0]} Max: {sites_scored_min_max[1]}')

def _process_weight_wes(self, weight, gvcf, site_gq_threshold, out_sites_scored):
Expand Down
3 changes: 3 additions & 0 deletions ImputationPipeline/ScoreBGE/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pysam==0.20.0
pandas==1.3.4
numpy==1.21.4
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import argparse
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import sys

parser = argparse.ArgumentParser(description = "Generate plot for the isoform reconstruction analysis summary.")
parser.add_argument("-i", "--input", required = True)
Expand Down Expand Up @@ -47,7 +45,7 @@
ax[1].bar(tools, precision_list, color = colors[1:7])
ax[2].bar(tools, f1_score_list, color = colors[1:7])

if args.save == True:
if args.save:
plt.savefig(args.dataset_name + "_analysis_summary_" + args.type + ".png")
else:
plt.show()
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import argparse
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import sys

parser = argparse.ArgumentParser(description = "Generate plot for the isoform reconstruction denovo analysis statistics.")
parser.add_argument("-i", "--input", required = True)
Expand Down Expand Up @@ -59,7 +57,7 @@
ax.set_xticks(x + width, tools)
ax.legend(loc = "upper left", ncols = len(tools))

if args.save == True:
if args.save:
plt.savefig(args.dataset_name + "_analysis_summary_denovo_" + args.type + ".png")
else:
plt.show()
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import sys
import os
import argparse
import math

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import sys
import os
import argparse

# reliable: all methods detected
Expand Down
13 changes: 7 additions & 6 deletions MultiQC_Terra/MultiQC.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
"metadata": {},
"outputs": [],
"source": [
"# !pip install git+https://github.com/kachulis/MultiQC.git@ck_gcp\n",
"# import os\n",
"# !pip install git+https://github.com/kachulis/MultiQC.git@ck_gcp\n",
"# os._exit(00)"
]
},
Expand Down Expand Up @@ -71,12 +71,13 @@
"from multiqc.utils import report, config\n",
"import os\n",
"from google.cloud import storage\n",
"storage_client = storage.Client()\n",
"import subprocess\n",
"import pathlib\n",
"import pytz\n",
"from datetime import datetime\n",
"\n",
"storage_client = storage.Client()\n",
"\n",
"\n",
"config.mqc_load_userconfig()\n",
"\n",
"def flat(pool):\n",
Expand Down Expand Up @@ -112,7 +113,7 @@
" workflow_with_submissions = [id_tuple for id_tuples in await asyncio.gather(*tasks) for id_tuple in id_tuples]\n",
"\n",
"if worklow_ids_to_include is not None:\n",
" workflow_with_submissions = [e for e in workflow_with_sumbissions if e[1] in worklow_ids_to_include]\n",
" workflow_with_submissions = [e for e in workflow_with_submissions if e[1] in worklow_ids_to_include]\n",
"print(f'Found {len(workflow_with_submissions)} workflows.')\n",
"\n",
"\n",
Expand Down Expand Up @@ -161,7 +162,7 @@
"print(f'Workflows found: {workflow_names}')\n",
"\n",
"# remove duplicates\n",
"print(f'Removing duplicates/reruns...')\n",
"print('Removing duplicates/reruns...')\n",
"metrics_file_groups = defaultdict(list)\n",
"for wn, on, st in metrics_file_paths:\n",
" metrics_file_groups[(wn, os.path.basename(on))].append((wn, on, st))\n",
Expand Down Expand Up @@ -240,4 +241,4 @@
},
"nbformat": 4,
"nbformat_minor": 2
}
}
2 changes: 0 additions & 2 deletions Utilities/Dockers/Alphashape/generate_alphashape.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import argparse
import pickle
import sys
import numpy as np
import pandas as pd
import alphashape

Expand Down
4 changes: 0 additions & 4 deletions Utilities/Dockers/Alphashape/pca_novelty_detection.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
import argparse
import pickle
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from descartes import PolygonPatch
from shapely.geometry import Point
import alphashape

# Parse command-line arguments
parser = argparse.ArgumentParser(description = "Automatically flag novelties in 2D PCA plots using Concave Hulls generated via alphashapes.")
Expand Down
3 changes: 1 addition & 2 deletions test/choose_watt_tests/choose_watt_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ def resolve_relative_path(rel_path: str) -> str:
return rel_path

def get_wdl_dependencies(womtool_run: subprocess.CompletedProcess):
womtool_stdout = womtool_run.stdout
return [l.decode() for l in womtool_run.stdout.splitlines() if l.endswith(b'.wdl')]
return [line.decode() for line in womtool_run.stdout.splitlines() if line.endswith(b'.wdl')]

if __name__ == '__main__':
args = parser.parse_args()
Expand Down

0 comments on commit 6b5f1a6

Please sign in to comment.