-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnoiseMeasure.py
120 lines (102 loc) · 4.5 KB
/
noiseMeasure.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/env python
import numpy as np
import argparse
import subprocess
import math
import statsmodels
from statsmodels import tsa
from statsmodels.tsa import stattools
import warnings
#warnings.filterwarnings("ignore")
parser = argparse.ArgumentParser(description='Tool used to either downsample or upsample Hi-C reads\n\n')
parser.add_argument("-i", "--hicfile", dest="hic_file", help="Input .hic file(s), can be comma separated list\n\n", metavar="FILE", required=True)
parser.add_argument('--res', dest='res', metavar='str', help="Resolution(s) to process the Hi-C file, can be comma separated list\n\n", required=True)
parser.add_argument('-c', dest='chrom', default = "1", help="Chromosome to examine\n\n")
parser.add_argument('-j', dest="juicertools", help="Path to juicer tools\n\n", required=True)
parser.add_argument('-o', dest="outputfile", default=0, help="Path to output file. If none specified, will output to terminal\n\n")
args = parser.parse_args()
inputfiles = args.hic_file
myfiles = inputfiles.split(",")
myfiles = [str(x) for x in myfiles]
inputres = args.res
myres = inputres.split(",")
myres = [str(x) for x in myres]
fullnoiselist=[[]]*len(myres)
mychr = str(args.chrom)
startbin = 1000000
endbin = 10000000
for r in range(0,len(myres)):
res=float(myres[r])
binnum = int(float(endbin-startbin)/res)
#endbin = startbin+(binnum*res)
noiselist=[]
intres=int(res)
location=":".join([str(mychr),str(int(startbin)),str(int(endbin))])
print(location)
startoffset = int(startbin/res)
for interactionfile in myfiles:
print("processing " + str(interactionfile) + " at " + str(intres) + " resolution")
expecteds=[]
with subprocess.Popen(['java', '-jar', args.juicertools, 'dump', 'expected', 'NONE', interactionfile, mychr, 'BP', str(intres)], stdout=subprocess.PIPE, stderr=subprocess.PIPE) as p:
output, errors = p.communicate()
lines = output.decode('utf-8').splitlines()
for line in lines:
myE = line.split()
if myE[0] == "WARN":
continue
if myE[0] == "WARNING:":
continue
if myE[0] == "INFO":
continue
#if re.search(r'WARN', str(myE[0])):
# continue
expecteds.append(float(myE[0]))
#print("expecteds from juicer")
#print(expecteds[0:10])
matsize=int((endbin-startbin)/intres+1)
dense=np.zeros((matsize,matsize))
with subprocess.Popen(['java', '-jar', args.juicertools, 'dump', 'observed', 'NONE', interactionfile, location, location, 'BP', str(intres)], stdout=subprocess.PIPE, stderr=subprocess.PIPE) as p:
output, errors = p.communicate()
resultstmp = output.decode('utf-8').splitlines()
for result in resultstmp:
re = result.split()
if re[0] == "INFO":
continue
if re[0] == "WARN":
continue
if re[0] == "WARNING:":
continue
#if re.search(r'WARN', str(myE[0])):
# continue
try:
left = math.floor(int(re[0])/res) - startoffset
except ValueError:
print(re[0])
right = math.floor(int(re[1])/res) - startoffset
dist = int((int(re[1])-int(re[0]))/res)
score = (float(re[2])+1)/(float(expecteds[dist])+1)
dense[left,right] = score
dense[right,left] = score
#print(dense[0:10,0:10])
noise=[]
for i in range(1,len(dense)):
try:
mylist = dense[i-1,:].tolist()
#noise.append(statsmodels.tsa.stattools.acf(dense[i-1:i,:], nlags=1)[1])
noise.append(statsmodels.tsa.stattools.acf(mylist,nlags=1)[1])
#print(statsmodels.tsa.stattools.acf(mylist,nlags=1))
#print(noise[-1])
except:
continue
noiselist.append(1/np.nanmean(noise))
fullnoiselist[r] = noiselist
if args.outputfile == 0:
for j in range(0,len(myres)):
for h in range(0,len(myfiles)):
print(str(myfiles[h]) + " at " + str(myres[j]) + " resolution, the noise is: " + str(fullnoiselist[j][h]))
else:
with open(args.outputfile, "w") as ofile:
for j in range(0,len(myres)):
for h in range(0,len(myfiles)):
linetowrite=(str(myfiles[h]) + "\t" + str(myres[j]) + "\t" + str(fullnoiselist[j][h]) + "\n")
ofile.write(linetowrite)