-
Notifications
You must be signed in to change notification settings - Fork 0
/
rebin.pl
executable file
·102 lines (81 loc) · 2.4 KB
/
rebin.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/env perl
use strict;
use warnings;
use Scalar::Util qw(looks_like_number);
use POSIX qw/floor/;
use List::MoreUtils qw(any);
BEGIN {
use FindBin '$Bin';
require "$Bin/share/Metaheader.pm";
}
# Takes as input the rstmatrix, rebin into genomic distance
if ($#ARGV != 0) {
print STDERR "usage: ./rebin.pl binsize(bp) < matrix > binmatrix\n";
exit;
};
my $binsize = shift @ARGV;
die "Binsize should be a number" if (!looks_like_number($binsize));
# Read the header
my $header = <STDIN>;
chomp($header);
my $metah = Metaheader->new($header);
my @rsts = @{ $metah->{rowinfo} };
warn "Metaheader lacks chromosome information... will rebin a single one"
unless exists $rsts[0]->{chr};
die "Metaheader lacks position information... Sad, I cannot rebin"
unless (exists $rsts[0]->{pos} ||
(exists $rsts[0]->{st} &&
exists $rsts[0]->{en}));
# Make the bin table
my @bins;
my @bintitle;
{
my $binstartp;
my $currchr = "-1";
for my $i (0..$#rsts) {
my $rstpos = $rsts[$i]->{pos} //
floor(($rsts[$i]->{st} + $rsts[$i]->{en}))/2;
my $chrnam = $rsts[$i]->{chr} // "";
# chromosome frontier?
if ($chrnam ne $currchr) {
push @bins, [];
$binstartp = floor($rstpos / $binsize) * $binsize;
my $binpos = $binstartp + floor($binsize/2);
push @bintitle, "\"$chrnam~$binpos\"";
$currchr = $chrnam;
}
# empty bins if no rst is there
while ($binstartp + $binsize < $rstpos) {
push @bins, [];
$binstartp += $binsize;
my $binpos = $binstartp + floor($binsize / 2);
push @bintitle, "\"$chrnam~$binpos\"";
}
push @{ $bins[$#bins] }, $i;
}
}
# Print the header
print join("\t", "\"BIN\"", @bintitle), "\n";
# Cycle over bins (output rows)
for my $binan (0 .. $#bins) {
my @output = (0) x ($#bins - $binan + 1);
print STDERR "\33[2K\rElaborating bin $binan out of $#bins";
# Cycle over bins (column index)
for my $i (0 .. $#{$bins[$binan]}) {
my $line = <STDIN>;
defined $line or die "Input format error";
chomp $line;
my @input = split("\t", $line); shift(@input);
for my $binbn ($binan .. $#bins) {
for my $j (0 .. $#{$bins[$binbn]}) {
next if (($binbn == $binan) && ($j < $i));
my $coln = $bins[$binbn]->[$j] - $bins[$binan]->[$i];
$output[$binbn - $binan] += $input[$coln] //
die "Missing columns in input";
}
}
}
print join("\t", $bintitle[$binan], @output), "\n";
}
print STDERR "\33[2K\rEND\n";
0;