-
Notifications
You must be signed in to change notification settings - Fork 0
/
subtract_sum.py
executable file
·541 lines (417 loc) · 12.3 KB
/
subtract_sum.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
#!/usr/bin/env python
'''
Space Telescope Science Institute
Synopsis:
Read the summary file with various switches and construct
an html version of the summary file so that the results
can be easily inspected.
Command line usage :
subtract_sum.py
-h print this help infomation
-prog_id xxxx construct html file for prog id xxxx
-all construct for all datasets that have been processed
-today construct for datasets processed today
-start construct for datasets taken after a givn date in mjd or iso format
-stop construct for datasets taken before date in mjd or iso format
if subtract_sum.py is called with no options. The program will crate a summary file for
the last hour's work
Description:
The routine reads the sum file to find the records that have been
completed with the swithces file and makes a very simple html file
to allow one to navigate the individual html files easily
Primary routines:
Notes:
History:
110104 ksl Coding begun
110201 ksl help changed
110428 ksl Rewrote using stand alone regions, not markup.py to allow for makeing a table
111115 ksl Began adding routines to include a figure of how bad the persistence is
on average
'''
# Added so this can run in the background without a $DISPLAY
# environment variable.
import matplotlib
matplotlib.use('Agg')
import sys
import date
import os
import per_list
import numpy
import html
import pylab
def link2file(link,word=''):
'''
Make the html to include a link
'''
if word == '':
word=link
string='<a href="file:%s" > %s </a>' % (link,word)
return string
def make_html(lines,filename='observations.html'):
'''
This routine makes the summary html file.
110428 ksl This routine was writtend because markup.py did not seem to be to handle
a simple table.
'''
header='''<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=ISO-8859-1">
<title>Summary Evaluation Page of Persistence</title>
</head>
<body>
<p>This page contains links to the individual html files for each
dataset</p>
<hr size="3" width="100%">
<table border="1" cellpadding="2" cellspacing="2" width="100%">
'''
string=header
for line in lines:
row='<tr>\n'
for word in line:
row=row+'<td> %s </td>\n' % word
row=row+'<tr>\n'
string=string+row
trailer='''
</tbody>
</table>
<hr size="3" width="100%">
</body>
</html>
'''
string=string+trailer
g=per_list.open_file(filename)
# g=open(filename,'w')
# os.chmod(filename,0770)
g.write('%s\n' % string)
return
def read_sum_file(fileroot='observations',status='Complete',prog_id=0,mjd_start=0,mjd_stop=0,proc_start=0,proc_stop=0):
'''
Read the summary file and return selected sets of records in the observations.sum with the selection determined by
prog_id program_id (a single number)
mjd_start datasets taken after mjd_start
mjd_stop datasets taken before mjd_stop
proc_start datasets processed after proc_start
proc_stop datasets processed before proc_stop
In general, if a value is 0, it is not used in selecting the data set
111011 ksl Modified the way the routine works so that to select something on the basis of status,
one just needs to include the string that is in status. This is to accommodate versioning.
NOTE -- It is not obvious that one should check for status at all, since one usually wants
to see what happened to a serios of files. What is more relevebant is the program ID or
whether one attempted to process it in a certain period.
'''
filename=fileroot+'.sum'
try:
f=open(filename,'r')
xlines=f.readlines()
f.close()
except IOError :
print "The file %s does not exist" % filename
return []
lines=[]
# print 'OK0',len(xlines)
i=0
while i<len(xlines):
z=xlines[i].strip()
z=z.split()
if len(z)>0:
if z[0][0]!='#':
# print z[5].count(status),z
if z[5].count(status) or status=='All':
lines.append(z)
i=i+1
# print 'OK1',len(lines)
# At this point we have split the records into words and we have chosen according to status
if prog_id != 0:
xlines=lines
lines=[]
for z in xlines:
if int(z[1])==int(prog_id):
lines.append(z)
# At this point our list has been reduced to those of the status we want and the prog_id we want
# print 'OK2',len(lines)
# 130103 - Added to fix bug where start time is provided but no end time
if mjd_start>0 and mjd_stop==0:
mjd_stop=mjd_start+1e6 # A large number
if mjd_stop > 0 and mjd_start < mjd_stop:
xlines=lines
lines=[]
for z in xlines:
xtime=eval(z[2])
if mjd_start <= xtime and xtime <= mjd_stop:
lines.append(z)
# Convert proc start and stop time to MJD
# print 'OK3',len(lines)
if proc_start!=0:
try:
proc_start=float(proc_start)
except ValueError:
proc_start=date.parse_iso(proc_start)
if proc_stop!=0:
try:
proc_stop =float(proc_stop )
except ValueError:
proc_stop =date.parse_iso(proc_stop )
if proc_stop != 0 or proc_start < proc_stop:
xlines=lines
lines=[]
for z in xlines:
# print z
# Construct the process time
proc_time='%s %s' % (z[3],z[4])
# print 'Starting',proc_time
proc_time=date.parse_iso(proc_time) # Convert to Mjd
# print proc_time, 'xxx',proc_start,proc_stop
if proc_start <= proc_time and proc_time<=proc_stop:
lines.append(z)
# print 'OK4',len(lines)
return lines
def worst(lines,records,nmax=500):
'''
Find the datasets with the most persitence and write information regarding these
to the file subtract_eval_worst.txt.
The selection is made on the fraction of pixels with persistence EXT2.
111110 ksl Modified to eliminate certain programs that are ringers, like ksl's persistence testing programs
111116 ksl Moved censoring to the main routine, and changed inputs to reflect this
'''
xlines=lines
xx=[]
for one in xlines:
xx.append(eval(one[7]))
xx=numpy.array(xx)
order=numpy.argsort(xx)
n=0
i=len(order)-1
g=open('subtract_eval_worst.txt','w')
table=[]
while i>=0:
# line=per_list.read_ordered_list_one(dataset=xlines[order[i]][0])
record=records[i]
# print record
string1= '%50s %10s %8s %8s %10s %5s %10s %20s %20s ' % (record[0],record[1],record[2],record[3],record[9],record[10],record[11],record[14],record[16])
one=xlines[order[i]]
# print records[order[i]]
string2= '%8s %8s %8s %8s %8s %8s %30s' % (one[6],one[7],one[8],one[9],one[10],one[11],one[12])
string=string1+string2
# print string
g.write('%s\n' % string)
n=n+1
if n>nmax:
break
tline=[]
tline.append(html.link(record[1],one[12]))
tline.append(record[2])
tline.append(record[3])
tline.append(record[9])
tline.append(record[10])
tline.append(record[11])
tline.append(record[14])
tline.append(record[16])
tline.append(one[6])
tline.append(one[7])
tline.append(one[8])
tline.append(one[9])
tline.append(one[10])
tline.append(one[11])
table.append(tline)
i=i-1
g.close()
string=html.begin('Worst Affected by Persistence')
string=string+html.table(table)
string=string+html.end()
g=open('Worst.html','w')
g.write(string)
g.close()
return
def make_fig(lines):
'''
Create a figure with showing how common various levels of persistence are
'''
values=[]
for one in lines:
values.append([one[6:12]])
values=numpy.array(values)
values=numpy.asfarray(values) # convert everything to floats
values=numpy.transpose(values)
label=['Ext > 0.1 e/s','Ext >0.03 e/s','Ext >0.01 e/s','Int > 0.1 e/s','Int >0.03 e/s','Int >0.01 e/s']
# print values[0]
# print len(values)
i=0
pylab.figure(1,(6,6))
pylab.clf()
while i<len(values):
y=numpy.ravel(values[i])
# print y
z=numpy.sort(y)
z=numpy.flipud(z)
x=numpy.linspace(0.,100.,num=len(z))
# print x
# print z
pylab.plot(x,z,label=label[i])
pylab.xlabel('% of images')
pylab.ylabel('% of pixels')
pylab.legend(loc='upper right')
pylab.axis([0,100,0,10])
i=i+1
pylab.draw()
pylab.savefig('persist_stats.png')
# Just plot external persistence
pylab.figure(2,(6,6))
pylab.clf()
i=0
while i<3:
y=numpy.ravel(values[i])
# print y
z=numpy.sort(y)
z=numpy.flipud(z)
x=numpy.linspace(0.,100.,num=len(z))
# print x
# print z
pylab.plot(x,z,label=label[i])
pylab.xlabel('% of images')
pylab.ylabel('% of pixels')
pylab.legend(loc='upper right')
pylab.axis([0,25,0,10])
i=i+1
pylab.draw()
pylab.savefig('persist_ext_stats.png')
return
def doit(fileroot='observations',status='Complete',prog_id=0,mjd_start=0,mjd_stop=0,proc_start=0,proc_stop=0,censor='yes'):
'''
This is the main routine for the subtract_asum program
It calls the routine to read the main parts of the summary file and then reformats the results
so a table can be made. It then calls the routine that makes the html file, and another routine
that attempts to locate examples of the worst persistence, and another to make a figure.
If censor=='yes', then certain calibration programs and all Tungsten lamp exposures are
excluded from analysis
'''
# print 'start,stop',mjd_start,mjd_stop
# lines=read_sum_file(fileroot,status,prog_id)
lines=read_sum_file(fileroot,status,prog_id,mjd_start,mjd_stop,proc_start,proc_stop)
xlines=[]
for line in lines:
if line[5].count('Compl'):
xlines.append(line)
lines=xlines
print 'Records to process (before censoring):',len(lines)
if len(lines)==0:
print 'There is no lines to process, so returning'
return
# Now get the corresponding lines in the observations.ls file
records=[]
for line in lines:
one=per_list.read_ordered_list_one(dataset=line[0])
records.append(one)
# So now we have two parallel lists one containing the summary file and one containing the .ls file
xlines=[]
title=['Rootname','Prog Id','Date_Proc','Time_Proc','Status','Ext1','Ext2','Ext3','Int1','Int2','Int3']
xlines.append(title)
for line in lines:
print line
xline=[link2file(line[12],line[0])]
xline.append(line[1])
xline.append(line[3])
xline.append(line[4])
xline.append(line[5])
xline=xline+line[6:12]
xlines.append(xline)
# print 'How many',len(xlines)
make_html(xlines)
# Now censor the list to produce the worst
ignore=[11423,11915,11930,12351,12338,11931,11696,12283]
if censor=='yes':
xlines=[]
xrecords=[]
i=0
while i<len(lines):
ok='yes'
prog_id=lines[i][1]
for value in ignore:
# print value, prog_id
if value==prog_id:
ok='no'
break
if records[i][14]=='TUNGSTEN':
ok='no'
if ok=='yes':
xlines.append(lines[i])
xrecords.append(records[i])
i=i+1
lines=xlines
records=xrecords
print 'Records to process (after censoring) :',len(lines)
# print lines[0]
# print lines[len(lines)-1]
worst(lines,records)
make_fig(lines)
return
def steer(argv):
'''
Steering routine for this routine which is intended to make it easier
to inspect the results of persistence subtraction.
'''
fileroot='observations'
status='Complete'
prog_id=0
mjd_start=0
mjd_stop=0
proc_start=0
proc_stop=0
i=1
while i<len(argv):
if argv[i]=='-h':
print __doc__
return
elif argv[i]=='-prog_id':
i=i+1
prog_id=int(argv[i])
elif argv[i]=='-all':
status='All'
elif argv[i]=='-status':
i=i+1
status=argv[i]
elif argv[i]=='-start':
i=i+1
z=argv[i]
try:
mjd_start=float(z)
except ValueError:
mjd_start=date.iso2mjd(z)
print 'Start',z,mjd_start
elif argv[i]=='-stop':
i=i+1
z=argv[i]
try:
mjd_stop=float(z)
except ValueError:
mjd_stop=date.iso2mjd(z)
print 'Stop',z,mjd_stop
elif argv[i]=='-today':
# today=date.get_gmt('%Y-%m-%d')
today=date.get_gmt()
today=date.parse_iso(today)
proc_start=today-86400
proc_stop=today
else:
print 'Unknown switch %s' % argv[i]
return
i=i+1
# parse_iso returns a unix time in seconds since Epoch0
if len(argv)==1:
today=date.get_gmt()
# print 'now',today
today=date.parse_iso(today)
proc_start=today-3600
proc_stop=today
print 'Creating summary file for last records processes in last hour'
print proc_start, proc_stop
doit(fileroot,status,prog_id,mjd_start,mjd_stop,proc_start,proc_stop)
return
# Next lines permit one to run the routine from the command line
if __name__ == "__main__":
import sys
if len(sys.argv)>1:
steer(sys.argv)
else:
# sys.argv.append('-h')
steer(sys.argv)