Skip to content

Commit

Permalink
Flit pyproject toml, precommit, -biopython +dnaio, Python 3.8<=3.13
Browse files Browse the repository at this point in the history
  • Loading branch information
bede committed Nov 27, 2024
1 parent dad7f1a commit c3234bb
Show file tree
Hide file tree
Showing 10 changed files with 633 additions and 497 deletions.
25 changes: 25 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: tests
on: [push]

jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-22.04, macos-12]
python-version: ['3.8', '3.13']
name: Python ${{ matrix.python-version }} (${{ matrix.os }})
steps:
- uses: actions/checkout@v2
- name: Setup conda
uses: s-weigand/setup-conda@v1
with:
update-conda: true
python-version: ${{ matrix.python-version }}
conda-channels: conda-forge, bioconda
- name: Install
run: |
pip install '.[dev]'
- name: Test
run: |
python -m pytest
7 changes: 7 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.4.4
hooks:
- id: ruff
args: [ --fix ]
- id: ruff-format
18 changes: 15 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,23 @@ Kindel reconciles substitutions and CIGAR-described indels to to produce a major

## Installation

```python
# Requires Python 3.6+
```shell
# Requires Python 3.9+ and Samtools
pip install kindel
```
For a complete installation using a conda-compatible package manager:

```
conda create -y -n kindel python=3.13 samtools
conda activate kindel
pip install kindel
```
Dependencies should automatically installed, except for Samtools which is needed for BAM input.

For a local development install:

```
pip install --editable '.[dev]' # pip install kindel '.[dev]'
```



Expand Down
4 changes: 3 additions & 1 deletion kindel/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
__version__ = '0.4.4'
"""Indel-aware consensus from aligned BAMs"""

__version__ = "1.0.0"
105 changes: 55 additions & 50 deletions kindel/cli.py
Original file line number Diff line number Diff line change
@@ -1,78 +1,83 @@
import sys
import argh

from Bio import SeqIO

from kindel import kindel

from kindel import __version__


def consensus(bam_path: 'path to SAM/BAM file',
realign: 'attempt to reconstruct reference around soft-clip boundaries'=False,
min_depth: 'substitute Ns at coverage depths beneath this value'=1,
min_overlap: 'match length required to close soft-clipped gaps'=7,
clip_decay_threshold: 'read depth fraction at which to cease clip extension'=0.1,
mask_ends: 'ignore clip dominant positions within n positions of termini'=50,
trim_ends: 'trim ambiguous nucleotides (Ns) from sequence ends'=False,
uppercase: 'close gaps using uppercase alphabet'=False):
'''Infer consensus sequence(s) from alignment in SAM/BAM format'''
result = kindel.bam_to_consensus(bam_path,
realign,
min_depth,
min_overlap,
clip_decay_threshold,
mask_ends,
trim_ends,
uppercase)
print('\n'.join([r for r in result.refs_reports.values()]), file=sys.stderr)
SeqIO.write(result.consensuses, sys.stdout,'fasta')


def weights(bam_path: 'path to SAM/BAM file',
relative: 'output relative nucleotide frequencies'=False,
no_confidence: 'skip confidence calculation'=False):
'''Returns table of per-site nucleotide frequencies and coverage'''
def consensus(
bam_path: "path to SAM/BAM file",
realign: "attempt to reconstruct reference around soft-clip boundaries" = False,
min_depth: "substitute Ns at coverage depths beneath this value" = 1,
min_overlap: "match length required to close soft-clipped gaps" = 7,
clip_decay_threshold: "read depth fraction at which to cease clip extension" = 0.1,
mask_ends: "ignore clip dominant positions within n positions of termini" = 50,
trim_ends: "trim ambiguous nucleotides (Ns) from sequence ends" = False,
uppercase: "close gaps using uppercase alphabet" = False,
):
"""Infer consensus sequence(s) from alignment in SAM/BAM format"""
result = kindel.bam_to_consensus(
bam_path,
realign,
min_depth,
min_overlap,
clip_decay_threshold,
mask_ends,
trim_ends,
uppercase,
)
print("\n".join([r for r in result.refs_reports.values()]), file=sys.stderr)
for consensus_record in result.consensuses:
print(f">{consensus_record.name}")
print(consensus_record.sequence)


def weights(
bam_path: "path to SAM/BAM file",
relative: "output relative nucleotide frequencies" = False,
no_confidence: "skip confidence calculation" = False,
):
"""Returns table of per-site nucleotide frequencies and coverage"""
weights_df = kindel.weights(bam_path, relative, no_confidence)
weights_df.to_csv(sys.stdout, sep='\t', index=False)
weights_df.to_csv(sys.stdout, sep="\t", index=False)


def features(bam_path: 'path to SAM/BAM file'):
'''Returns table of per-site nucleotide frequencies and coverage including indels'''
def features(bam_path: "path to SAM/BAM file"):
"""Returns table of per-site nucleotide frequencies and coverage including indels"""
weights_df = kindel.features(bam_path)
weights_df.to_csv(sys.stdout, sep='\t', index=False)
weights_df.to_csv(sys.stdout, sep="\t", index=False)


def variants(bam_path: 'path to SAM/BAM file',
abs_threshold: 'absolute frequency (0-∞) threshold above which to call variants'=1,
rel_threshold: 'relative frequency (0.0-1.0) threshold above which to call variants'=0.01,
only_variants: 'exclude invariant sites from output'=False,
absolute: 'report absolute variant frequencies'=False):
'''Output variants exceeding specified absolute and relative frequency thresholds'''
variants_df = kindel.variants(bam_path, abs_threshold, rel_threshold, only_variants, absolute)
variants_df.to_csv(sys.stdout, sep='\t', index=False, na_rep=0)
def variants(
bam_path: "path to SAM/BAM file",
abs_threshold: "absolute frequency (0-∞) threshold above which to call variants" = 1,
rel_threshold: "relative frequency (0.0-1.0) threshold above which to call variants" = 0.01,
only_variants: "exclude invariant sites from output" = False,
absolute: "report absolute variant frequencies" = False,
):
"""Output variants exceeding specified absolute and relative frequency thresholds"""
variants_df = kindel.variants(
bam_path, abs_threshold, rel_threshold, only_variants, absolute
)
variants_df.to_csv(sys.stdout, sep="\t", index=False, na_rep=0)


def plot(bam_path: 'path to SAM/BAM file'):
'''Plot sitewise soft clipping frequency across reference and genome'''
def plot(bam_path: "path to SAM/BAM file"):
"""Plot sitewise soft clipping frequency across reference and genome"""
return kindel.plotly_clips(bam_path)


def version():
'''Show version'''
return f"kindel {__version__}"
"""Show version"""
return f"kindel {__version__}"


def main():
parser = argh.ArghParser()
parser.add_commands([consensus,
weights,
features,
variants,
plot,
version])
parser.add_commands([consensus, weights, features, variants, plot, version])
parser.dispatch()


if __name__ == '__main__':
if __name__ == "__main__":
main()
41 changes: 0 additions & 41 deletions kindel/debug.py

This file was deleted.

Loading

0 comments on commit c3234bb

Please sign in to comment.