forked from NBISweden/ProtExcluder
-
Notifications
You must be signed in to change notification settings - Fork 0
/
GCcontent.pl
executable file
·123 lines (112 loc) · 2.13 KB
/
GCcontent.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#! /usr/bin/perl -w
$usage = "GCcotent.pl input-fasta-file \n";
# to caculate the number of each nucleotide in DNA sequences
if (@ARGV < 1) {die $usage;}
open(FA, "$ARGV[0]") || die $usage;
$seq = "";
$tctn = 0;
$tcta = 0;
$tctc = 0;
$tctg = 0;
$tctt = 0;
while (<FA>) {
if (/>\s*(\S+)\s(.*)/) {
$ctn = 0;
$cta = 0;
$ctc = 0;
$ctg = 0;
$ctt = 0;
if ($seq) {
@sym = split(//, $seq);
foreach $sym (@sym) {
if ($sym eq "T" || $sym eq "t") {
$ctt ++;
}
else {
if ($sym eq "G" || $sym eq "g") {
$ctg ++;
}
else {
if ($sym eq "C" || $sym eq "c") {
$ctc ++;
}
else {
if ($sym eq "A" || $sym eq "a") {
$cta ++;
}
else {
$ctn ++;
}
}
}
}
}
$total = $cta + $ctc + $ctg +$ctt;
if ($total > 0) {
$pergc = ($ctc +$ctg)/$total;
}
else {
$pergc = 0;
}
printf "%06d %06d %06d %06d %06d %.3f\n",$cta,$ctc,$ctg,$ctt,$ctn,$pergc;
}
printf "%s\t", $1;
$tctn = $tctn + $ctn;
$tcta = $tcta + $cta;
$tctc = $tctc + $ctc;
$tctg = $tctg + $ctg;
$tctt = $tctt + $ctt;
$seq = "";
}
else {
chomp;
$seq .= $_;
}
}
close FA;
@sym = split(//, $seq);
$ctn = 0;
$cta = 0;
$ctc = 0;
$ctg = 0;
$ctt = 0;
foreach $sym (@sym) {
if ($sym eq "T" || $sym eq "t") {
$ctt ++;
}
else {
if ($sym eq "G" || $sym eq "g") {
$ctg ++;
}
else {
if ($sym eq "C" || $sym eq "c") {
$ctc ++;
}
else {
if ($sym eq "A" || $sym eq "a") {
$cta ++;
}
else {
$ctn ++;
}
}
}
}
}
$total = $cta + $ctc + $ctg +$ctt;
if ($total > 0) {
$pergc = ($ctc +$ctg)/$total;
printf "%06d %06d %06d %06d %06d %.3f\n",$cta,$ctc,$ctg,$ctt,$ctn,$pergc;
}
$tctn = $tctn + $ctn;
$tcta = $tcta + $cta;
$tctc = $tctc + $ctc;
$tctg = $tctg + $ctg;
$tctt = $tctt + $ctt;
$tc = $tcta +$tctc +$tctg +$tctt;
$tcn = $tctn + $tc;
print "A C G T N totalnoN total\n";
printf "%08d %08d %08d %08d %08d %08d %08d\n", $tcta,$tctc,$tctg,$tctt,$tctn,$tc,$tcn;
printf "AT %08d GC %08d\n", ($tcta+$tctt),($tctc+$tctg);
$gc = ($tctg +$tctc)*100/$tc;
printf "GC is %.3f\n",$gc;