Skip to content

Commit

Permalink
Add --minscore to report '-' scheme when not enough alleles
Browse files Browse the repository at this point in the history
  • Loading branch information
tseemann committed Feb 14, 2017
1 parent 1d3e93d commit 279c786
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 12 deletions.
25 changes: 16 additions & 9 deletions bin/mlst
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,17 @@ use MLST::FileUtils qw(is_genbank genbank_to_fasta is_gzipped gunzip_file);
#..............................................................................
# Globals

my $VERSION = "2.7b";
my $VERSION = "2.8";
my $SEP = '/';
my $OUTSEP = "\t";

#..............................................................................
# Command line options

my(@Options, $debug, $quiet, $blastdb, $datadir, $threads, $exclude,
$list, $longlist, $scheme, $minid, $mincov, $csv, $nopath, $novel);
$list, $longlist, $scheme,
$minid, $mincov, $minscore,
$csv, $nopath, $novel);
setOptions();

#..............................................................................
Expand Down Expand Up @@ -181,17 +183,21 @@ sub find_mlst {
}

# find the signature with the fewest missing/approximate alleles
my @sig = ( [ ($scheme || '-'), '-', join("/", ('-')x7), -666 ] ); # sentinel
my @sig = ( [ ($scheme || '-'), '-', join("/", ('-')x7), 0 ] ); # sentinel

for my $name (keys %res) {
my $sig = $scheme{$name}->signature_of($res{$name});
my $ST = $scheme{$name}->sequence_type($sig);
my $score = 0;
$score -= 1*($sig =~ tr/~/~/);
$score -= 2*($sig =~ tr/?/?/);
$score -= 3*($sig =~ tr/-/-/);
push @sig, [ $name, $ST, $sig, $score ];
msg("SCORE=$score\t$name\t$ST\t$sig") if $debug;
my $nlocii = $scheme{$name}->num_genes;
# score is out of 100
# penalties are given as below for inexact alleles
my $score = $nlocii;
$score -= 0.2*($sig =~ tr/~/~/);
$score -= 0.5*($sig =~ tr/?/?/);
$score -= 1.0*($sig =~ tr/-/-/);
$score = int($score * 100 / $nlocii);
msg("SCORE=$score\t$name\t$ST\t$sig\t($nlocii genes)") if $debug;
push @sig, [ $name, $ST, $sig, $score ] if $score > $minscore;
}

@sig = sort {
Expand Down Expand Up @@ -233,6 +239,7 @@ sub setOptions {
{OPT=>"longlist!", VAR=>\$longlist, DEFAULT=>0, DESC=>"List allelles for all MLST schemes"},
{OPT=>"minid=f", VAR=>\$minid, DEFAULT=>95, DESC=>"DNA %identity of full allelle to consider 'similar' [~]"},
{OPT=>"mincov=f", VAR=>\$mincov, DEFAULT=>10, DESC=>"DNA %cov to report partial allele at all [?]"},
{OPT=>"minscore=f", VAR=>\$minscore, DEFAULT=>20, DESC=>"Minumum score out of 100 to match a scheme"},
{OPT=>"threads=i", VAR=>\$threads, DEFAULT=>1, DESC=>"Number of BLAST threads (suggest GNU Parallel instead)"},
{OPT=>"csv!", VAR=>\$csv, DEFAULT=>0, DESC=>"Output CSV instead of TSV"},
{OPT=>"nopath!", VAR=>\$nopath, DEFAULT=>0, DESC=>"Strip filename paths from FILE column"},
Expand Down
4 changes: 1 addition & 3 deletions perl5/MLST/FileUtils.pm
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@ sub is_gzipped {
my($infile) = @_;
my($magic) = qx(file --brief --dereference \Q$infile\E);
return $magic =~ m/gzip/ ? 1 : 0;
# open my $in, '<', $infile;
# my $magic;
# read $in, $buffer, 2;
}

#----------------------------------------------------------------------
Expand Down Expand Up @@ -46,6 +43,7 @@ sub genbank_to_fasta {
chomp;
if (m{^//}) {
print $out ">gi|$gi|gb|$acc| $def [$org]\n$dna";
# print STDERR "[genbank_to_fasta] $gi $acc $def $org\n";
$in_seq = 0;
$dna = $gi = $acc = $org = $def = '';
next;
Expand Down

0 comments on commit 279c786

Please sign in to comment.