This repository has been archived by the owner on Apr 4, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 40
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #344 from cmc333333/separate-interp-3
Separate interpretations part 3
- Loading branch information
Showing
33 changed files
with
5,458 additions
and
1,277 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
# -*- coding: utf-8 -*- | ||
import logging | ||
|
||
from lxml import etree | ||
|
||
from regparser import content, plugins | ||
from regparser.tree.struct import Node | ||
|
||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def get_reg_part(reg_doc): | ||
""" | ||
Depending on source, the CFR part number exists in different places. Fetch | ||
it, wherever it is. | ||
""" | ||
|
||
potential_parts = [] | ||
potential_parts.extend( | ||
# FR notice | ||
node.attrib['PART'] for node in reg_doc.xpath('//REGTEXT')) | ||
potential_parts.extend( | ||
# e-CFR XML, under PART/EAR | ||
node.text.replace('Pt.', '').strip() | ||
for node in reg_doc.xpath('//PART/EAR') | ||
if 'Pt.' in node.text) | ||
potential_parts.extend( | ||
# e-CFR XML, under FDSYS/HEADING | ||
node.text.replace('PART', '').strip() | ||
for node in reg_doc.xpath('//FDSYS/HEADING') | ||
if 'PART' in node.text) | ||
potential_parts.extend( | ||
# e-CFR XML, under FDSYS/GRANULENUM | ||
node.text.strip() for node in reg_doc.xpath('//FDSYS/GRANULENUM')) | ||
potential_parts = [p for p in potential_parts if p.strip()] | ||
|
||
if potential_parts: | ||
return potential_parts[0] | ||
|
||
|
||
def get_title(reg_doc): | ||
""" Extract the title of the regulation. """ | ||
parent = reg_doc.xpath('//PART/HD')[0] | ||
title = parent.text | ||
return title | ||
|
||
|
||
def preprocess_xml(xml): | ||
"""This transforms the read XML through macros. Each macro consists of | ||
an xpath and a replacement xml string""" | ||
logger.info("Preprocessing XML %s", xml) | ||
for path, replacement in content.Macros(): | ||
replacement = etree.fromstring('<ROOT>' + replacement + '</ROOT>') | ||
for node in xml.xpath(path): | ||
parent = node.getparent() | ||
idx = parent.index(node) | ||
parent.remove(node) | ||
for repl in replacement: | ||
parent.insert(idx, repl) | ||
idx += 1 | ||
|
||
|
||
def build_tree(reg_xml): | ||
logger.info("Build tree %s", reg_xml) | ||
preprocess_xml(reg_xml) | ||
|
||
reg_part = get_reg_part(reg_xml) | ||
title = get_title(reg_xml) | ||
|
||
tree = Node("", [], [reg_part], title) | ||
|
||
part = reg_xml.xpath('//PART')[0] | ||
matchers = list(plugins.instantiate_if_possible( | ||
'eregs_ns.parser.xml_matchers.gpo_cfr.PART')) | ||
|
||
for xml_node in part.getchildren(): | ||
for plugin in matchers: | ||
if plugin.matches(tree, xml_node): | ||
plugin(tree, xml_node) | ||
|
||
return tree |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.