This repository has been archived by the owner on Apr 4, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 40
/
Copy pathamendments.py
77 lines (64 loc) · 2.67 KB
/
amendments.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import functools
from copy import deepcopy
from lxml import etree
from interpparser import gpo_cfr
from regparser.notice.amendments.utils import label_amdpar_from
from regparser.notice.util import spaces_then_remove
from regparser.tree.struct import Node
def content_for_interpretations(instruction_xml):
"""Return a chunk of XML (which serves as a unique key) and a think for
parsing that XML as an interpretation"""
label_parts, amdpar = label_amdpar_from(instruction_xml)
if len(label_parts) > 1 and 'Interpretations' in label_parts[1]:
xml = amdpar.getparent()
return xml, functools.partial(parse_interp, label_parts[0], xml)
def parse_interp(cfr_part, xml):
"""Figure out which parts of the parent_xml are relevant to
interpretations. Pass those on to interpretations.parse_from_xml and
return the results"""
parent_xml = standardize_interp_xml(xml)
# Skip over everything until 'Supplement I' in a header
seen_header = False
xml_nodes = []
def contains_supp(n):
text = (n.text or '').lower()
return 'supplement i' in text
for child in parent_xml:
# SECTION shouldn't be in this part of the XML, but often is. Expand
# it to proceed
if seen_header and child.tag == 'SECTION':
sectno = child.xpath('./SECTNO')[0]
subject = child.xpath('./SUBJECT')[0]
header = etree.Element("HD", SOURCE="HD2")
header.text = sectno.text + '—' + subject.text
child.insert(child.index(sectno), header)
child.remove(sectno)
child.remove(subject)
xml_nodes.extend(child.getchildren())
elif seen_header:
xml_nodes.append(child)
else:
if child.tag == 'HD' and contains_supp(child):
seen_header = True
if any(contains_supp(c) for c in child.xpath(".//HD")):
seen_header = True
root = Node(label=[cfr_part, Node.INTERP_MARK], node_type=Node.INTERP)
root = gpo_cfr.parse_from_xml(root, xml_nodes)
if not root.children:
return None
else:
return root
def standardize_interp_xml(xml):
"""We will assume a format of Supplement I header followed by HDs,
STARS, and Ps, so move anything in an EXTRACT up a level"""
xml = spaces_then_remove(deepcopy(xml), 'PRTPAGE')
for extract in xml.xpath(".//EXTRACT|.//APPENDIX|.//SUBPART"):
ex_parent = extract.getparent()
idx = ex_parent.index(extract)
for child in extract:
ex_parent.insert(idx, child)
idx += 1
ex_parent.remove(extract)
return xml