forked from thomasp85/Biotools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathblastAtlasCf
executable file
·91 lines (80 loc) · 3.23 KB
/
blastAtlasCf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/perl
# Authors: Peter Fisher Hallin, Lars Juhl Jensen, Carsten Friis, Anders Gorm Petersen, Alex Bolshoy, Thomas Pedersen
# For license see /usr/biotools/CMG-biotools.license
use Getopt::Long;
my $organism;
my @query;
GetOptions ("ref=s" => \$organism,
"query=s{,}" => \@query);
print "You must define query files. Use -query <queryFiles>. For genome atlases use \"genomeAtlasCf\" instead.\n" unless @query;
print "You must define a reference genome. Use -ref <reference>.gbk.\n" unless defined $organism;
unless (defined $organism && @query){
exit;
}
$organism =~s/.gbk$//;
chomp(my $length = `/usr/biotools/saco_convert -I genbank -O length $organism.gbk`);
my $lengthtext = $length;
$lengthtext =~ s/^([0-9]{1,3})(([0-9]{3})+)$/$a=$1; $b=$2; $b =~ s#([0-9]{3})#,\1#g; $a.$b/e;
open GBK, "${organism}.gbk" or die;
my $name;
while (<GBK>){
if (/^ ORGANISM /) {
s/^ ORGANISM //;
chomp;
$name = $_;
last;
}
}
$name =~ s/["]+//g;
$name =~ s/[']+//g;
open (MYFILE, "> ${organism}.blastatlas.cf");
printf MYFILE ("genomesize $length;
stamp \"BLAST ATLAS\";
legendpage;
noatlas;
noatlasscore;
ann CDS pos 0.0 0.0 1.0 \"CDS +\" fillarrow mark;
ann CDS neg 1.0 0.0 0.0 \"CDS -\" fillarrow mark;
ann rRNA 0.0 1.0 1.0 \"rRNA\" fillarrow mark;
ann rRNA pos 0.0 1.0 1.0 \"rRNA +\" fillarrow mark;
ann rRNA neg 0.0 1.0 1.0 \"rRNA -\" fillarrow mark;
ann tRNA 0.0 1.0 0.0 \"tRNA\" fillarrow mark;
ann tRNA pos 0.0 1.0 0.0 \"tRNA +\" fillarrow mark;
ann tRNA neg 0.0 1.0 0.0 \"tRNA -\" fillarrow mark;
dat $organism.curvature.gz 1 0.0 0.0 0.0 \"Intrinsic Curvature\" boxfilter %d;
dat $organism.ornstein.gz 1 0.0 0.0 0.0 \"Stacking Energy\" boxfilter %d;
dat $organism.travers.gz 1 0.0 0.0 0.0 \"Position Preference\" boxfilter %d;
dat $organism.blast.Direct.gz 1 0.0 0.0 0.0 \"Global Direct Repeats\";
dat $organism.blast.Inverted.gz 1 0.0 0.0 0.0 \"Global Inverted Repeats\";
dat $organism.baseskews.col.gz 3 0.0 0.0 0.0 \"GC Skew\" boxfilter %d;
dat $organism.baseskews.col.gz 4 0.0 0.0 0.0 \"Percent AT\" boxfilter %d;
"), ((0.002*$length), (0.002*$length), (0.002*$length), (0.001*$length), (0.001*$length));
foreach (@query){
print MYFILE "dat $_.genomemap.gz 1 0.0 0.0 0.0 \"$_\";\n";
}
printf MYFILE ("circletics auto;
circletext \"$name\" \"$lengthtext bp\";
circle $organism.baseskews.col.gz 4 \"001010_101010_100000.cm2\" by 0.2 0.8;
circle $organism.baseskews.col.gz 3 \"100010_101010_001010.cm2\" by avg 3.0 dev;
circle $organism.blast.Inverted.gz 1 \"101010_100000.cm2\" by 5.0 7.5;
circle $organism.blast.Direct.gz 1 \"101010_000010.cm2\" by 5.0 7.5;
circle CDS pos, CDS neg, rRNA, tRNA by dir;
circle $organism.travers.gz 1 \"001000_101010_100010.cm2\" by avg 3.0 dev;
circle $organism.ornstein.gz 1 \"001000_101010_100000.cm2\" by avg 3.0 dev;
circle $organism.curvature.gz 1 \"100600_101010_000010.cm2\" by avg 3.0 dev;
");
foreach (@query){
print MYFILE "circle $_.genomemap.gz 1 \"101010_060000.cm2\" by 0.00 100.0;\n";
}
printf MYFILE ("file $organism.curvature.gz dat;
file $organism.ornstein.gz dat;
file $organism.travers.gz dat;
file $organism.ann ann;
file $organism.blast.Direct.gz dat;
file $organism.blast.Inverted.gz dat;
file $organism.baseskews.col.gz dat;
");
foreach (@query){
print MYFILE "file $_.genomemap.gz dat;\n";
}
close MYFILE;