forked from fbennett/legal-resource-registry
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnormalize-ids.py
executable file
·53 lines (43 loc) · 1.34 KB
/
normalize-ids.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/python
import os,sys,json,re
from LRR.utils import Utils
utils = Utils()
jurisdiction = sys.argv[1]
for dirpath,dirnames,filenames in os.walk("data/courts/%s" % jurisdiction):
indexPath = os.path.join(dirpath,"index.txt")
if not os.path.exists(indexPath):
print "Oops: %s" % indexPath
sys.exit()
fh = open(indexPath)
lines = []
template = None
firstContent = False
isCourt = False
while 1:
line = fh.readline()
if not line: break
if line.find(':category-id:') > -1: continue
if line.find(':court-id:') > -1: continue
line = line.rstrip()
if not firstContent:
if not line: continue
firstContent = True
if line.startswith('.. category:: '):
template = ' :category-id: %s'
elif line.startswith('.. court:: '):
isCourt = True
template = ' :court-id: %s'
lines.append(line)
fh.close()
if not template:
print "Ouch: %s" % indexPath
sys.exit()
if isCourt:
id = utils.joinUrn(dirpath.split("/")[2:], True)
else:
id = ":".join(dirpath.split("/")[2:])
lines = lines[0:1] + [template % id] + lines[1:]
txt = "\n".join(lines) + "\n"
sys.stdout.write(".");\
sys.stdout.flush()
open(indexPath, "w+").write(txt)