Skip to content
This repository has been archived by the owner on Apr 4, 2023. It is now read-only.

Commit

Permalink
Merge pull request #344 from cmc333333/separate-interp-3
Browse files Browse the repository at this point in the history
Separate interpretations part 3
  • Loading branch information
cmc333333 authored Jan 8, 2017
2 parents 062f5a3 + a079e2c commit 21c2ed3
Show file tree
Hide file tree
Showing 33 changed files with 5,458 additions and 1,277 deletions.
2 changes: 1 addition & 1 deletion interpparser/preprocessors.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from regparser.tree.xml_parser.interpretations import get_app_title
from regparser.tree.gpo_cfr.interpretations import get_app_title


_CONTAINS_SUPPLEMENT = "contains(., 'Supplement I')"
Expand Down
4 changes: 2 additions & 2 deletions regparser/commands/annual_editions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from regparser.history import annual
from regparser.index import dependency, entry
from regparser.tree import xml_parser
from regparser.tree import gpo_cfr


LastVersionInYear = namedtuple('LastVersionInYear', ['version_id', 'year'])
Expand Down Expand Up @@ -50,7 +50,7 @@ def process_if_needed(cfr_title, cfr_part, last_version_list):
deps.validate_for(tree_entry)
if deps.is_stale(tree_entry):
input_entry = annual_path / last_version.year
tree = xml_parser.reg_text.build_tree(input_entry.read().xml)
tree = gpo_cfr.builder.build_tree(input_entry.read().xml)
tree_entry.write(tree)


Expand Down
4 changes: 2 additions & 2 deletions regparser/commands/current_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from regparser.history.versions import Version
from regparser.index import dependency, entry
from regparser.notice.fake import build as build_fake_notice
from regparser.tree import xml_parser
from regparser.tree.gpo_cfr import builder


_version_id = '{}-annual-{}'.format
Expand All @@ -26,7 +26,7 @@ def process_if_needed(volume, cfr_part):
deps.add(tree_entry, annual_entry)
deps.validate_for(tree_entry)
if deps.is_stale(tree_entry):
tree = xml_parser.reg_text.build_tree(annual_entry.read().xml)
tree = builder.build_tree(annual_entry.read().xml)
tree_entry.write(tree)
notice_entry.write(build_fake_notice(
version_id, volume.publication_date, volume.title, cfr_part))
Expand Down
8 changes: 4 additions & 4 deletions regparser/notice/amendments.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
from regparser.notice import changes, util
from regparser.notice.amdparser import amendment_from_xml
from regparser.tree.struct import Node, walk
from regparser.tree.xml_parser import interpretations
from regparser.tree.xml_parser.appendices import process_appendix
from regparser.tree.xml_parser.reg_text import (
build_from_section, build_subpart)
from regparser.tree.gpo_cfr import interpretations
from regparser.tree.gpo_cfr.appendices import process_appendix
from regparser.tree.gpo_cfr.section import build_from_section
from regparser.tree.gpo_cfr.subpart import build_subpart


logger = logging.getLogger(__name__)
Expand Down
4 changes: 2 additions & 2 deletions regparser/notice/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from regparser.grammar.tokens import Verb
from regparser.tree.struct import Node, find, find_parent
from regparser.tree.xml_parser import interpretations, reg_text
from regparser.tree.gpo_cfr import interpretations, section


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -91,7 +91,7 @@ def overwrite_marker(origin, new_label):
new one (new_label). This is necessary during node moves. """

if origin.node_type == Node.REGTEXT:
marker_list = reg_text.initial_markers(origin.text)
marker_list = section.initial_markers(origin.text)
if len(marker_list) > 0:
marker = '(%s)' % marker_list[0]
new_marker = '(%s)' % new_label
Expand Down
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
import logging
import re

from lxml import etree
from pyparsing import LineStart, Optional, Suppress
import six

from regparser.citations import internal_citations
from regparser.grammar import appendix as grammar
Expand All @@ -18,8 +16,7 @@
from regparser.tree.depth.derive import derive_depths
from regparser.tree.paragraph import p_levels
from regparser.tree.struct import Node
from regparser.tree.xml_parser import tree_utils
from regparser.tree.xml_parser.interpretations import build_supplement_tree
from regparser.tree.xml_parser import matchers, tree_utils

from settings import APPENDIX_IGNORE_SUBHEADER_LABEL

Expand Down Expand Up @@ -337,6 +334,11 @@ def process_appendix(appendix, part):
return AppendixProcessor(part).process(appendix)


@matchers.match_tag('APPENDIX')
def parse_appendix(parent, xml_node):
parent.children.append(process_appendix(xml_node, parent.cfr_part))


def parsed_title(text, appendix_letter):
digit_str_parser = (Marker(appendix_letter) +
Suppress('-') +
Expand Down Expand Up @@ -392,20 +394,3 @@ def initial_marker(text):
match.period_lower or match.period_digit)
if len(marker) < 3 or all(char in 'ivxlcdm' for char in marker):
return marker, text[:end]


def build_non_reg_text(reg_xml, reg_part):
""" This builds the tree for the non-regulation text such as Appendices
and the Supplement section """
if isinstance(reg_xml, six.string_types):
doc_root = etree.fromstring(reg_xml)
else:
doc_root = reg_xml
children = []

children.extend(process_appendix(appendix, reg_part)
for appendix in doc_root.xpath('//PART//APPENDIX'))
children.extend(build_supplement_tree(reg_part, interp)
for interp in doc_root.xpath('//PART//INTERP'))

return children
82 changes: 82 additions & 0 deletions regparser/tree/gpo_cfr/builder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# -*- coding: utf-8 -*-
import logging

from lxml import etree

from regparser import content, plugins
from regparser.tree.struct import Node


logger = logging.getLogger(__name__)


def get_reg_part(reg_doc):
"""
Depending on source, the CFR part number exists in different places. Fetch
it, wherever it is.
"""

potential_parts = []
potential_parts.extend(
# FR notice
node.attrib['PART'] for node in reg_doc.xpath('//REGTEXT'))
potential_parts.extend(
# e-CFR XML, under PART/EAR
node.text.replace('Pt.', '').strip()
for node in reg_doc.xpath('//PART/EAR')
if 'Pt.' in node.text)
potential_parts.extend(
# e-CFR XML, under FDSYS/HEADING
node.text.replace('PART', '').strip()
for node in reg_doc.xpath('//FDSYS/HEADING')
if 'PART' in node.text)
potential_parts.extend(
# e-CFR XML, under FDSYS/GRANULENUM
node.text.strip() for node in reg_doc.xpath('//FDSYS/GRANULENUM'))
potential_parts = [p for p in potential_parts if p.strip()]

if potential_parts:
return potential_parts[0]


def get_title(reg_doc):
""" Extract the title of the regulation. """
parent = reg_doc.xpath('//PART/HD')[0]
title = parent.text
return title


def preprocess_xml(xml):
"""This transforms the read XML through macros. Each macro consists of
an xpath and a replacement xml string"""
logger.info("Preprocessing XML %s", xml)
for path, replacement in content.Macros():
replacement = etree.fromstring('<ROOT>' + replacement + '</ROOT>')
for node in xml.xpath(path):
parent = node.getparent()
idx = parent.index(node)
parent.remove(node)
for repl in replacement:
parent.insert(idx, repl)
idx += 1


def build_tree(reg_xml):
logger.info("Build tree %s", reg_xml)
preprocess_xml(reg_xml)

reg_part = get_reg_part(reg_xml)
title = get_title(reg_xml)

tree = Node("", [], [reg_part], title)

part = reg_xml.xpath('//PART')[0]
matchers = list(plugins.instantiate_if_possible(
'eregs_ns.parser.xml_matchers.gpo_cfr.PART'))

for xml_node in part.getchildren():
for plugin in matchers:
if plugin.matches(tree, xml_node):
plugin(tree, xml_node)

return tree
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from regparser.tree.depth.derive import derive_depths
from regparser.tree.interpretation import merge_labels, text_to_labels
from regparser.tree.struct import Node, treeify
from regparser.tree.xml_parser import tree_utils
from regparser.tree.xml_parser import matchers, tree_utils


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -299,6 +299,11 @@ def build_supplement_tree(reg_part, node):
return parse_from_xml(root, node.getchildren())


@matchers.match_tag('INTERP')
def parse_interp(parent, xml_node):
parent.children.append(build_supplement_tree(parent.cfr_part, xml_node))


def get_app_title(node):
""" Appendix/Supplement sections have the title in an HD tag, or
if they are reserved, in a <RESERVED> tag. Extract the title. """
Expand Down
Loading

0 comments on commit 21c2ed3

Please sign in to comment.