-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathalleleSeqPeak
executable file
·161 lines (136 loc) · 4.38 KB
/
alleleSeqPeak
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/usr/bin/perl
use warnings;
use strict;
use File::Basename;
use Getopt::Long;
use Pod::Usage;
=head1 NAME
alleleSeqPeak
=head1 SYNOPSIS
alleleSeqPeak [options]
-h help
this aggregates the reads by maternal and paternal alleles; the rest of the alleles are disregarded
info is taken from intersecting BED format of AlleleSeq output and peak format:
chrm snppos ref mat_gtype pat_gtype phase mat_all pat_all cA cC cG cT winning SymCls SymPval BindingSite cnv
chr peakStart peakEnd
if all hets (allele = 'None') pat and mat allele counts = 0
Example:
alleleSeqPeak
=head1 DESCRIPTION
=cut
#option variables
my $help;
#initialize options
Getopt::Long::Configure ('bundling');
if(!GetOptions ('h'=>\$help) || scalar(@ARGV)!=1)
{
if ($help)
{
pod2usage(-verbose => 2);
}
else
{
pod2usage(1);
}
}
## read ensembl file
my $ifile = $ARGV[0];
open (INPUT, $ifile) || die "Cannot open $ifile: $!";
## output file
#my($name, $path, $ext) = fileparse($ifile, '\..*');
#my $ofile = "$name.out";
#open (OUTPUT, ">$ofile") || die "Cannot open $ofile: $!";
# variables
my $headerProcessed = 0;
my %peakid2MPcounts;
my %peakid2snpid;
my %peakid2winning;
my %counts;
my %peaks;
my %peakid2snpcount;
## read ensembl file
while (<INPUT>)
{
s/\r?\n?$//;
my @fields = split(/\t/, $_);
chomp @fields;
my $snpid = $fields[0]."-".$fields[1]."-".$fields[2];
my $peakid = $fields[19]."-".$fields[20]."-".$fields[21];
my $mat_all = $fields[8];
my $pat_all = $fields[9];
my $cA = $fields[10];
my $cC = $fields[11];
my $cG = $fields[12];
my $cT = $fields[13];
my $winning = $fields[14];
# initialize store snp id
if(!exists($peakid2snpid{$peakid}))
{
$peakid2snpid{$peakid} = $snpid;
$peakid2snpcount{$peakid} = 1;
}
else
{
$peakid2snpid{$peakid} = "$peakid2snpid{$peakid};$snpid";
$peakid2snpcount{$peakid}++;
}
# initialize store maternal and paternal alleles; note that any other alleles are disregarded
if(!exists($peakid2MPcounts{$peakid}{'mat'}))
{
if($mat_all eq 'A'){ $peakid2MPcounts{$peakid}{'mat'} = $cA; }
elsif($mat_all eq 'C'){ $peakid2MPcounts{$peakid}{'mat'} = $cC; }
elsif($mat_all eq 'G'){ $peakid2MPcounts{$peakid}{'mat'} = $cG; }
elsif($mat_all eq 'T'){ $peakid2MPcounts{$peakid}{'mat'} = $cT; }
else{ $peakid2MPcounts{$peakid}{'mat'} = 0; } ## if all hets pat and mat allele = 0
}
else
{
if($mat_all eq 'A'){ $peakid2MPcounts{$peakid}{'mat'} = $peakid2MPcounts{$peakid}{'mat'} + $cA; }
elsif($mat_all eq 'C'){ $peakid2MPcounts{$peakid}{'mat'} = $peakid2MPcounts{$peakid}{'mat'} + $cC; }
elsif($mat_all eq 'G'){ $peakid2MPcounts{$peakid}{'mat'} = $peakid2MPcounts{$peakid}{'mat'} + $cG; }
elsif($mat_all eq 'T'){ $peakid2MPcounts{$peakid}{'mat'} = $peakid2MPcounts{$peakid}{'mat'} + $cT; }
else{ $peakid2MPcounts{$peakid}{'mat'} = $peakid2MPcounts{$peakid}{'mat'} + 0; } ## if all hets pat and mat allele = 0
}
if(!exists($peakid2MPcounts{$peakid}{'pat'}))
{
if($pat_all eq 'A'){ $peakid2MPcounts{$peakid}{'pat'} = $cA; }
elsif($pat_all eq 'C'){ $peakid2MPcounts{$peakid}{'pat'} = $cC; }
elsif($pat_all eq 'G'){ $peakid2MPcounts{$peakid}{'pat'} = $cG; }
elsif($pat_all eq 'T'){ $peakid2MPcounts{$peakid}{'pat'} = $cT; }
else{ $peakid2MPcounts{$peakid}{'pat'} = 0; } ## if all hets pat and mat allele = 0
}
else
{
if($pat_all eq 'A'){ $peakid2MPcounts{$peakid}{'pat'} = $peakid2MPcounts{$peakid}{'pat'} + $cA; }
elsif($pat_all eq 'C'){ $peakid2MPcounts{$peakid}{'pat'} = $peakid2MPcounts{$peakid}{'pat'} + $cC; }
elsif($pat_all eq 'G'){ $peakid2MPcounts{$peakid}{'pat'} = $peakid2MPcounts{$peakid}{'pat'} + $cG; }
elsif($pat_all eq 'T'){ $peakid2MPcounts{$peakid}{'pat'} = $peakid2MPcounts{$peakid}{'pat'} + $cT; }
else{ $peakid2MPcounts{$peakid}{'pat'} = $peakid2MPcounts{$peakid}{'pat'} + 0; } ## if all hets pat and mat allele = 0
}
# initialize peak id 2 winning
if(!exists($peakid2winning{$peakid}))
{
$peakid2winning{$peakid} = $winning;
}
else
{
$peakid2winning{$peakid} = "$peakid2winning{$peakid};$winning";
}
# initialize peakid
if(!exists($peaks{$peakid}))
{
$peaks{$peakid} = 1;
}
}
close(INPUT);
## printing
print "peakid\tcMat\tcPat\twins\tsnpid\tsnpcounts\n";
for my $key (keys %peaks)
{
print "$key\t".
"$peakid2MPcounts{$key}{'mat'}\t".
"$peakid2MPcounts{$key}{'pat'}\t".
"$peakid2winning{$key}\t".
"$peakid2snpid{$key}\t".
"$peakid2snpcount{$key}\n";
}