diff --git a/interpparser/amendments.py b/interpparser/amendments.py new file mode 100644 index 00000000..18d7f287 --- /dev/null +++ b/interpparser/amendments.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import functools +from copy import deepcopy + +from lxml import etree + +from interpparser import gpo_cfr +from regparser.notice.amendments.utils import label_amdpar_from +from regparser.notice.util import spaces_then_remove +from regparser.tree.struct import Node + + +def content_for_interpretations(instruction_xml): + """Return a chunk of XML (which serves as a unique key) and a think for + parsing that XML as an interpretation""" + label_parts, amdpar = label_amdpar_from(instruction_xml) + if len(label_parts) > 0 and 'Interpretations' in label_parts[1]: + xml = amdpar.getparent() + return xml, functools.partial(parse_interp, label_parts[0], xml) + + +def parse_interp(cfr_part, xml): + """Figure out which parts of the parent_xml are relevant to + interpretations. Pass those on to interpretations.parse_from_xml and + return the results""" + parent_xml = standardize_interp_xml(xml) + + # Skip over everything until 'Supplement I' in a header + seen_header = False + xml_nodes = [] + + def contains_supp(n): + text = (n.text or '').lower() + return 'supplement i' in text + + for child in parent_xml: + # SECTION shouldn't be in this part of the XML, but often is. Expand + # it to proceed + if seen_header and child.tag == 'SECTION': + sectno = child.xpath('./SECTNO')[0] + subject = child.xpath('./SUBJECT')[0] + header = etree.Element("HD", SOURCE="HD2") + header.text = sectno.text + '—' + subject.text + child.insert(child.index(sectno), header) + child.remove(sectno) + child.remove(subject) + xml_nodes.extend(child.getchildren()) + elif seen_header: + xml_nodes.append(child) + else: + if child.tag == 'HD' and contains_supp(child): + seen_header = True + if any(contains_supp(c) for c in child.xpath(".//HD")): + seen_header = True + + root = Node(label=[cfr_part, Node.INTERP_MARK], node_type=Node.INTERP) + root = gpo_cfr.parse_from_xml(root, xml_nodes) + if not root.children: + return None + else: + return root + + +def standardize_interp_xml(xml): + """We will assume a format of Supplement I header followed by HDs, + STARS, and Ps, so move anything in an EXTRACT up a level""" + xml = spaces_then_remove(deepcopy(xml), 'PRTPAGE') + for extract in xml.xpath(".//EXTRACT|.//APPENDIX|.//SUBPART"): + ex_parent = extract.getparent() + idx = ex_parent.index(extract) + for child in extract: + ex_parent.insert(idx, child) + idx += 1 + ex_parent.remove(extract) + return xml diff --git a/regparser/tree/gpo_cfr/interpretations.py b/interpparser/gpo_cfr.py similarity index 99% rename from regparser/tree/gpo_cfr/interpretations.py rename to interpparser/gpo_cfr.py index ba85e79c..26056b53 100644 --- a/regparser/tree/gpo_cfr/interpretations.py +++ b/interpparser/gpo_cfr.py @@ -5,12 +5,12 @@ import logging import re +from interpparser.tree import merge_labels, text_to_labels from regparser.citations import Label, remove_citation_overlaps from regparser.layer.key_terms import KeyTerms from regparser.tree.depth import markers as mtypes from regparser.tree.depth import heuristics, rules from regparser.tree.depth.derive import derive_depths -from regparser.tree.interpretation import merge_labels, text_to_labels from regparser.tree.struct import Node, treeify from regparser.tree.xml_parser import matchers, tree_utils diff --git a/interpparser/layers.py b/interpparser/layers.py index 6d57fe3f..49ef0737 100644 --- a/interpparser/layers.py +++ b/interpparser/layers.py @@ -1,9 +1,9 @@ from collections import defaultdict +from interpparser.tree import text_to_labels from regparser.citations import Label from regparser.layer.layer import Layer from regparser.tree import struct -from regparser.tree.interpretation import text_to_labels class Interpretations(Layer): diff --git a/interpparser/preprocessors.py b/interpparser/preprocessors.py index cd118abe..aaa2089d 100644 --- a/interpparser/preprocessors.py +++ b/interpparser/preprocessors.py @@ -1,4 +1,4 @@ -from regparser.tree.gpo_cfr.interpretations import get_app_title +from interpparser.gpo_cfr import get_app_title _CONTAINS_SUPPLEMENT = "contains(., 'Supplement I')" _SUPPLEMENT_HD = "//REGTEXT//HD[@SOURCE='HD1' and {0}]".format( diff --git a/interpparser/setup.py b/interpparser/setup.py index 0e72b634..18bbb7d3 100644 --- a/interpparser/setup.py +++ b/interpparser/setup.py @@ -9,12 +9,18 @@ 'License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication' ], entry_points={ + 'eregs_ns.parser.amendment.content': + ('interpretations = ' + 'interpparser.amendments:content_for_interpretations'), 'eregs_ns.parser.layer.cfr': 'interpretations = interpparser.layers:Interpretations', 'eregs_ns.parser.preprocessors': [ 'supplement-amdpar = interpparser.preprocessors:supplement_amdpar', ('appendix-to-interp = interpparser.preprocessors:' 'appendix_to_interp'), - ], + ], + "eregs_ns.parser.xml_matchers.gpo_cfr.PART": [ + "interpretations = interpparser.gpo_cfr:parse_interp", + ] } ) diff --git a/regparser/tree/interpretation.py b/interpparser/tree.py similarity index 100% rename from regparser/tree/interpretation.py rename to interpparser/tree.py diff --git a/regparser/grammar/appendix.py b/regparser/grammar/appendix.py index 25f59247..a94f3da4 100644 --- a/regparser/grammar/appendix.py +++ b/regparser/grammar/appendix.py @@ -1,6 +1,8 @@ import string -from pyparsing import FollowedBy, Literal, Word +from pyparsing import FollowedBy, LineEnd, LineStart, Literal, SkipTo, Word + +from regparser.grammar import atomic, unified, utils def parenthesize(characters, name): @@ -30,3 +32,35 @@ def decimalize(characters, name): period_upper = decimalize(string.ascii_uppercase, "period_upper") period_lower = decimalize(string.ascii_lowercase, "period_lower") period_digit = decimalize(string.digits, "period_digit") + + +section = ( + atomic.section_marker.copy().leaveWhitespace() + + unified.part_section + + SkipTo(LineEnd()) +) + + +par = ( + atomic.section.copy().leaveWhitespace() + + unified.depth1_p + + SkipTo(LineEnd()) +) + + +marker_par = ( + atomic.paragraph_marker.copy().leaveWhitespace() + + atomic.section + + unified.depth1_p +) + + +appendix = ( + atomic.appendix_marker.copy().leaveWhitespace() + + atomic.appendix + + SkipTo(LineEnd()) +) + + +headers = utils.QuickSearchable( + LineStart() + (section | marker_par | par | appendix)) diff --git a/regparser/grammar/interpretation_headers.py b/regparser/grammar/interpretation_headers.py deleted file mode 100644 index f03a1c66..00000000 --- a/regparser/grammar/interpretation_headers.py +++ /dev/null @@ -1,34 +0,0 @@ -from pyparsing import LineEnd, LineStart, SkipTo - -from regparser.grammar import atomic, unified, utils - -section = ( - atomic.section_marker.copy().leaveWhitespace() + - unified.part_section + - SkipTo(LineEnd()) -) - - -par = ( - atomic.section.copy().leaveWhitespace() + - unified.depth1_p + - SkipTo(LineEnd()) -) - - -marker_par = ( - atomic.paragraph_marker.copy().leaveWhitespace() + - atomic.section + - unified.depth1_p -) - - -appendix = ( - atomic.appendix_marker.copy().leaveWhitespace() + - atomic.appendix + - SkipTo(LineEnd()) -) - - -parser = utils.QuickSearchable( - LineStart() + (section | marker_par | par | appendix)) diff --git a/regparser/notice/amendments.py b/regparser/notice/amendments.py deleted file mode 100644 index 2390c018..00000000 --- a/regparser/notice/amendments.py +++ /dev/null @@ -1,313 +0,0 @@ -# -*- coding: utf-8 -*- -import logging -from collections import namedtuple -from copy import deepcopy -from itertools import dropwhile - -from lxml import etree - -from regparser.notice import changes, util -from regparser.notice.amdparser import amendment_from_xml -from regparser.tree.gpo_cfr import interpretations -from regparser.tree.gpo_cfr.appendices import process_appendix -from regparser.tree.gpo_cfr.section import build_from_section -from regparser.tree.gpo_cfr.subpart import build_subpart -from regparser.tree.struct import Node, walk - -logger = logging.getLogger(__name__) -Content = namedtuple('Content', ['struct', 'amends']) - - -class ContentCache(object): - """As we can expect several amending instructions to refer to the same - section/appendix/etc., this object exists so that we only parse chunks of - relevant XML once.""" - def __init__(self): - self.by_xml = {} - - def fetch(self, key, fn, *args): - """Check the cache; if not present, run fn with the given args""" - if key is not None and key not in self.by_xml: - self.by_xml[key] = Content(fn(*args), []) - return self.by_xml.get(key) - - def content_of_change(self, instruction_xml): - """Instructions which create or modify a chunk of the CFR need to know - not only which paragraphs are being modified, but the _content_ of - those modifications. This method searches the XML around the - instruction and attempts to derive a related Node""" - is_editing = instruction_xml.tag in ('POST', 'PUT', 'INSERT', - 'RESERVE') - label = instruction_xml.get('label', '') - label_parts = label.split('-') - cfr_part = label_parts[0] - - # ... - amdpar = instruction_xml.getparent().getparent() - new_subpart = (instruction_xml.tag == 'POST' and - len(label_parts) == 2 and 'Subpart:' in label_parts[1]) - - if not is_editing: - return None - elif new_subpart: - xml = find_subpart(amdpar) - return self.fetch(xml, build_subpart, cfr_part, xml) - elif 'Appendix' in label: - xml = amdpar.getparent() - letter = label_parts[1][len('Appendix:'):] - return self.fetch(xml, parse_appendix, xml, cfr_part, letter) - elif 'Interpretations' in label: - xml = amdpar.getparent() - return self.fetch(xml, parse_interp, cfr_part, xml) - else: - xml = find_section(amdpar) - return self.fetch(xml, parse_regtext, xml, cfr_part) - - -def parse_regtext(xml, cfr_part): - """Small wrapper around build_from_section that returns only one section""" - sections = build_from_section(cfr_part, xml) - if sections: - return sections[0] - - -def parse_appendix(xml, cfr_part, letter): - """Attempt to parse an appendix. Used when the entire appendix has been - replaced/added or when we can use the section headers to determine our - place. If the format isn't what we expect, display a warning.""" - xml = deepcopy(xml) - hds = xml.xpath('//HD[contains(., "Appendix {0} to Part {1}")]'.format( - letter, cfr_part)) - if len(hds) == 0: - logger.warning("Could not find Appendix %s to part %s", - letter, cfr_part) - elif len(hds) > 1: - logger.warning("Too many headers for %s to part %s", - letter, cfr_part) - else: - hd = hds[0] - hd.set('SOURCE', 'HED') - extract = hd.getnext() - if extract is not None and extract.tag == 'EXTRACT': - extract.insert(0, hd) - for trailing in dropwhile(lambda n: n.tag != 'AMDPAR', - extract.getchildren()): - extract.remove(trailing) - return process_appendix(extract, cfr_part) - logger.warning("Bad format for whole appendix") - - -def parse_interp(cfr_part, parent_xml): - """Figure out which parts of the parent_xml are relevant to - interpretations. Pass those on to interpretations.parse_from_xml and - return the results""" - parent_xml = standardize_interp_xml(parent_xml) - - # Skip over everything until 'Supplement I' in a header - seen_header = False - xml_nodes = [] - - def contains_supp(n): - text = (n.text or '').lower() - return 'supplement i' in text - - for child in parent_xml: - # SECTION shouldn't be in this part of the XML, but often is. Expand - # it to proceed - if seen_header and child.tag == 'SECTION': - sectno = child.xpath('./SECTNO')[0] - subject = child.xpath('./SUBJECT')[0] - header = etree.Element("HD", SOURCE="HD2") - header.text = sectno.text + u'—' + subject.text - child.insert(child.index(sectno), header) - child.remove(sectno) - child.remove(subject) - xml_nodes.extend(child.getchildren()) - elif seen_header: - xml_nodes.append(child) - else: - if child.tag == 'HD' and contains_supp(child): - seen_header = True - if any(contains_supp(c) for c in child.xpath(".//HD")): - seen_header = True - - root = Node(label=[cfr_part, Node.INTERP_MARK], node_type=Node.INTERP) - root = interpretations.parse_from_xml(root, xml_nodes) - if not root.children: - return None - else: - return root - - -def standardize_interp_xml(xml): - """We will assume a format of Supplement I header followed by HDs, - STARS, and Ps, so move anything in an EXTRACT up a level""" - xml = util.spaces_then_remove(deepcopy(xml), 'PRTPAGE') - for extract in xml.xpath(".//EXTRACT|.//APPENDIX|.//SUBPART"): - ex_parent = extract.getparent() - idx = ex_parent.index(extract) - for child in extract: - ex_parent.insert(idx, child) - idx += 1 - ex_parent.remove(extract) - return xml - - -def fix_section_node(paragraphs, amdpar_xml): - """ When notices are corrected, the XML for notices doesn't follow the - normal syntax. Namely, pargraphs aren't inside section tags. We fix that - here, by finding the preceding section tag and appending paragraphs to it. - """ - - sections = [s for s in amdpar_xml.itersiblings(preceding=True) - if s.tag == 'SECTION'] - - # Let's only do this if we find one section tag. - if len(sections) == 1: - section = deepcopy(sections[0]) - for paragraph in paragraphs: - section.append(deepcopy(paragraph)) - return section - - -def find_lost_section(amdpar_xml): - """ This amdpar doesn't have any following siblings, so we - look in the next regtext """ - reg_text = amdpar_xml.getparent() - reg_text_siblings = [s for s in reg_text.itersiblings() - if s.tag == 'REGTEXT'] - if len(reg_text_siblings) > 0: - candidate_reg_text = reg_text_siblings[0] - amdpars = [a for a in candidate_reg_text if a.tag == 'AMDPAR'] - if len(amdpars) == 0: - # Only do this if there are not AMDPARS - for c in candidate_reg_text: - if c.tag == 'SECTION': - return c - - -def find_section(amdpar_xml): - """ With an AMDPAR xml, return the first section sibling """ - siblings = [s for s in amdpar_xml.itersiblings()] - - if len(siblings) == 0: - return find_lost_section(amdpar_xml) - - for sibling in siblings: - if sibling.tag == 'SECTION': - return sibling - - paragraphs = [s for s in siblings if s.tag == 'P'] - if len(paragraphs) > 0: - return fix_section_node(paragraphs, amdpar_xml) - - -def find_subpart(amdpar_tag): - """ Look amongst an amdpar tag's siblings to find a subpart. """ - for sibling in amdpar_tag.itersiblings(): - if sibling.tag == 'SUBPART': - return sibling - - -def fetch_amendments(notice_xml): - """Process changes to the regulation that are expressed in the notice.""" - notice_changes = changes.NoticeChanges() - - if notice_xml.xpath('.//AMDPAR[not(EREGS_INSTRUCTIONS)]'): - logger.warning( - 'No . Was this notice preprocessed?') - - cache = ContentCache() - authority_by_xml = {} - for instruction_xml in notice_xml.xpath('.//EREGS_INSTRUCTIONS/*'): - amendment = amendment_from_xml(instruction_xml) - content = cache.content_of_change(instruction_xml) - if instruction_xml.tag == 'MOVE_INTO_SUBPART': - subpart_changes = process_designate_subpart(amendment) - if subpart_changes: - notice_changes.add_changes(amendment.amdpar_xml, - subpart_changes) - elif instruction_xml.tag == 'AUTHORITY': - authority_by_xml[amendment.amdpar_xml] = instruction_xml.text - elif changes.new_subpart_added(amendment): - subpart_changes = {} - for change in changes.create_subpart_amendment(content.struct): - subpart_changes.update(change) - notice_changes.add_changes(amendment.amdpar_xml, subpart_changes) - elif content: - content.amends.append(amendment) - else: - create_xmlless_change(amendment, notice_changes) - - for content in cache.by_xml.values(): - create_xml_changes(content.amends, content.struct, notice_changes) - - amendments = [] - for amdpar_xml in notice_xml.xpath('.//AMDPAR'): - amendment_dict = {"instruction": amdpar_xml.text} - # There'll be at most one - for inst_xml in amdpar_xml.xpath('./EREGS_INSTRUCTIONS'): - context = inst_xml.get('final_context', '') - amendment_dict['cfr_part'] = context.split('-')[0] - relevant_changes = notice_changes.changes_by_xml[amdpar_xml] - if relevant_changes: - amendment_dict['changes'] = list(relevant_changes.items()) - if amdpar_xml in authority_by_xml: - amendment_dict['authority'] = authority_by_xml[amdpar_xml] - - amendments.append(amendment_dict) - - return amendments - - -def process_designate_subpart(amendment): - """ Process the designate amendment if it adds a subpart. """ - label_id = '-'.join(amendment.label) - return {label_id: {'action': 'DESIGNATE', - 'destination': amendment.destination}} - - -def create_xmlless_change(amendment, notice_changes): - """Deletes, moves, and the like do not have an associated XML structure. - Add their changes""" - amend_map = changes.match_labels_and_changes([amendment], None) - for label, amendments in amend_map.items(): - for amendment in amendments: - if amendment['action'] == 'DELETE': - notice_changes.add_changes( - amendment['amdpar_xml'], - {label: {'action': amendment['action']}}) - elif amendment['action'] == 'MOVE': - change = {'action': amendment['action']} - destination = [d for d in amendment['destination'] if d != '?'] - change['destination'] = destination - notice_changes.add_changes( - amendment['amdpar_xml'], {label: change}) - else: - logger.warning("Unknown action: %s", amendment['action']) - - -def create_xml_changes(amended_labels, section, notice_changes): - """For PUT/POST, match the amendments to the section nodes that got - parsed, and actually create the notice changes. """ - - def per_node(node): - node.child_labels = [c.label_id() for c in node.children] - walk(section, per_node) - - amend_map = changes.match_labels_and_changes(amended_labels, section) - - for label, amendments in amend_map.items(): - for amendment in amendments: - if amendment['action'] in ('POST', 'PUT', 'INSERT'): - if 'field' in amendment: - nodes = changes.create_field_amendment(label, amendment) - else: - nodes = changes.create_add_amendment(amendment) - for n in nodes: - notice_changes.add_changes(amendment['amdpar_xml'], n) - elif amendment['action'] == 'RESERVE': - change = changes.create_reserve_amendment(amendment) - notice_changes.add_changes(amendment['amdpar_xml'], change) - else: - logger.warning("Unknown action: %s", amendment['action']) diff --git a/regparser/notice/amendments/__init__.py b/regparser/notice/amendments/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/regparser/notice/amendments/appendix.py b/regparser/notice/amendments/appendix.py new file mode 100644 index 00000000..82e5a87c --- /dev/null +++ b/regparser/notice/amendments/appendix.py @@ -0,0 +1,46 @@ +import functools +import logging +from copy import deepcopy +from itertools import dropwhile + +from regparser.notice.amendments.utils import label_amdpar_from +from regparser.tree.gpo_cfr.appendices import process_appendix + +logger = logging.getLogger(__name__) + + +def content_for_appendix(instruction_xml): + """Return a chunk of XML (which serves as a unique key) and a think for + parsing that XML as an appendix""" + label_parts, amdpar = label_amdpar_from(instruction_xml) + if len(label_parts) > 0 and 'Appendix' in label_parts[1]: + xml = amdpar.getparent() + letter = label_parts[1][len('Appendix:'):] + return xml, functools.partial(parse_appendix, xml, label_parts[0], + letter) + + +def parse_appendix(xml, cfr_part, letter): + """Attempt to parse an appendix. Used when the entire appendix has been + replaced/added or when we can use the section headers to determine our + place. If the format isn't what we expect, display a warning.""" + xml = deepcopy(xml) + hds = xml.xpath('//HD[contains(., "Appendix {0} to Part {1}")]'.format( + letter, cfr_part)) + if len(hds) == 0: + logger.warning("Could not find Appendix %s to part %s", + letter, cfr_part) + elif len(hds) > 1: + logger.warning("Too many headers for %s to part %s", + letter, cfr_part) + else: + hd = hds[0] + hd.set('SOURCE', 'HED') + extract = hd.getnext() + if extract is not None and extract.tag == 'EXTRACT': + extract.insert(0, hd) + for trailing in dropwhile(lambda n: n.tag != 'AMDPAR', + extract.getchildren()): + extract.remove(trailing) + return process_appendix(extract, cfr_part) + logger.warning("Bad format for whole appendix") diff --git a/regparser/notice/amendments/fetch.py b/regparser/notice/amendments/fetch.py new file mode 100644 index 00000000..2fbef8c8 --- /dev/null +++ b/regparser/notice/amendments/fetch.py @@ -0,0 +1,143 @@ +# -*- coding: utf-8 -*- +import logging +from collections import namedtuple + +from stevedore.extension import ExtensionManager + +from regparser.notice import changes +from regparser.notice.amdparser import amendment_from_xml +from regparser.notice.amendments.subpart import process_designate_subpart +from regparser.tree.struct import walk + +logger = logging.getLogger(__name__) +Content = namedtuple('Content', ['struct', 'amends']) + + +class ContentCache(object): + """As we can expect several amending instructions to refer to the same + section/appendix/etc., this object exists so that we only parse chunks of + relevant XML once.""" + def __init__(self): + self.by_xml = {} + + def fetch(self, key, fn, *args): + """Check the cache; if not present, run fn with the given args""" + if key is not None and key not in self.by_xml: + self.by_xml[key] = Content(fn(*args), []) + return self.by_xml.get(key) + + def content_of_change(self, instruction_xml): + """Instructions which create or modify a chunk of the CFR need to know + not only which paragraphs are being modified, but the _content_ of + those modifications. This method searches the XML around the + instruction and attempts to derive a related Node""" + is_editing = instruction_xml.tag in ('POST', 'PUT', 'INSERT', + 'RESERVE') + + if not is_editing: + return None + + for extension in ExtensionManager('eregs_ns.parser.amendment.content'): + result = extension.plugin(instruction_xml) + if result: + key, fn = result + if key is not None and key not in self.by_xml: + self.by_xml[key] = Content(fn(), []) + return self.by_xml.get(key) + + +def fetch_amendments(notice_xml): + """Process changes to the regulation that are expressed in the notice.""" + notice_changes = changes.NoticeChanges() + + if notice_xml.xpath('.//AMDPAR[not(EREGS_INSTRUCTIONS)]'): + logger.warning( + 'No . Was this notice preprocessed?') + + cache = ContentCache() + authority_by_xml = {} + for instruction_xml in notice_xml.xpath('.//EREGS_INSTRUCTIONS/*'): + amendment = amendment_from_xml(instruction_xml) + content = cache.content_of_change(instruction_xml) + if instruction_xml.tag == 'MOVE_INTO_SUBPART': + subpart_changes = process_designate_subpart(amendment) + if subpart_changes: + notice_changes.add_changes(amendment.amdpar_xml, + subpart_changes) + elif instruction_xml.tag == 'AUTHORITY': + authority_by_xml[amendment.amdpar_xml] = instruction_xml.text + elif changes.new_subpart_added(amendment): + subpart_changes = {} + for change in changes.create_subpart_amendment(content.struct): + subpart_changes.update(change) + notice_changes.add_changes(amendment.amdpar_xml, subpart_changes) + elif content: + content.amends.append(amendment) + else: + create_xmlless_change(amendment, notice_changes) + + for content in cache.by_xml.values(): + create_xml_changes(content.amends, content.struct, notice_changes) + + amendments = [] + for amdpar_xml in notice_xml.xpath('.//AMDPAR'): + amendment_dict = {"instruction": amdpar_xml.text} + # There'll be at most one + for inst_xml in amdpar_xml.xpath('./EREGS_INSTRUCTIONS'): + context = inst_xml.get('final_context', '') + amendment_dict['cfr_part'] = context.split('-')[0] + relevant_changes = notice_changes.changes_by_xml[amdpar_xml] + if relevant_changes: + amendment_dict['changes'] = list(relevant_changes.items()) + if amdpar_xml in authority_by_xml: + amendment_dict['authority'] = authority_by_xml[amdpar_xml] + + amendments.append(amendment_dict) + + return amendments + + +def create_xmlless_change(amendment, notice_changes): + """Deletes, moves, and the like do not have an associated XML structure. + Add their changes""" + amend_map = changes.match_labels_and_changes([amendment], None) + for label, amendments in amend_map.items(): + for amendment in amendments: + if amendment['action'] == 'DELETE': + notice_changes.add_changes( + amendment['amdpar_xml'], + {label: {'action': amendment['action']}}) + elif amendment['action'] == 'MOVE': + change = {'action': amendment['action']} + destination = [d for d in amendment['destination'] if d != '?'] + change['destination'] = destination + notice_changes.add_changes( + amendment['amdpar_xml'], {label: change}) + else: + logger.warning("Unknown action: %s", amendment['action']) + + +def create_xml_changes(amended_labels, section, notice_changes): + """For PUT/POST, match the amendments to the section nodes that got + parsed, and actually create the notice changes. """ + + def per_node(node): + node.child_labels = [c.label_id() for c in node.children] + walk(section, per_node) + + amend_map = changes.match_labels_and_changes(amended_labels, section) + + for label, amendments in amend_map.items(): + for amendment in amendments: + if amendment['action'] in ('POST', 'PUT', 'INSERT'): + if 'field' in amendment: + nodes = changes.create_field_amendment(label, amendment) + else: + nodes = changes.create_add_amendment(amendment) + for n in nodes: + notice_changes.add_changes(amendment['amdpar_xml'], n) + elif amendment['action'] == 'RESERVE': + change = changes.create_reserve_amendment(amendment) + notice_changes.add_changes(amendment['amdpar_xml'], change) + else: + logger.warning("Unknown action: %s", amendment['action']) diff --git a/regparser/notice/amendments/section.py b/regparser/notice/amendments/section.py new file mode 100644 index 00000000..f91fafd6 --- /dev/null +++ b/regparser/notice/amendments/section.py @@ -0,0 +1,67 @@ +from copy import deepcopy + +from regparser.notice.amendments.utils import label_amdpar_from +from regparser.tree.gpo_cfr.section import build_from_section + + +def content_for_regtext(instruction_xml): + """Return a chunk of XML (which serves as a unique key) and a think for + parsing that XML as a section""" + label_parts, amdpar = label_amdpar_from(instruction_xml) + xml = find_section(amdpar) + + def parse_regtext(): + sections = build_from_section(label_parts[0], xml) + if sections: + return sections[0] + + return xml, parse_regtext + + +def find_section(amdpar_xml): + """ With an AMDPAR xml, return the first section sibling """ + siblings = [s for s in amdpar_xml.itersiblings()] + + if len(siblings) == 0: + return find_lost_section(amdpar_xml) + + for sibling in siblings: + if sibling.tag == 'SECTION': + return sibling + + paragraphs = [s for s in siblings if s.tag == 'P'] + if len(paragraphs) > 0: + return fix_section_node(paragraphs, amdpar_xml) + + +def find_lost_section(amdpar_xml): + """ This amdpar doesn't have any following siblings, so we + look in the next regtext """ + reg_text = amdpar_xml.getparent() + reg_text_siblings = [s for s in reg_text.itersiblings() + if s.tag == 'REGTEXT'] + if len(reg_text_siblings) > 0: + candidate_reg_text = reg_text_siblings[0] + amdpars = [a for a in candidate_reg_text if a.tag == 'AMDPAR'] + if len(amdpars) == 0: + # Only do this if there are not AMDPARS + for c in candidate_reg_text: + if c.tag == 'SECTION': + return c + + +def fix_section_node(paragraphs, amdpar_xml): + """ When notices are corrected, the XML for notices doesn't follow the + normal syntax. Namely, pargraphs aren't inside section tags. We fix that + here, by finding the preceding section tag and appending paragraphs to it. + """ + + sections = [s for s in amdpar_xml.itersiblings(preceding=True) + if s.tag == 'SECTION'] + + # Let's only do this if we find one section tag. + if len(sections) == 1: + section = deepcopy(sections[0]) + for paragraph in paragraphs: + section.append(deepcopy(paragraph)) + return section diff --git a/regparser/notice/amendments/subpart.py b/regparser/notice/amendments/subpart.py new file mode 100644 index 00000000..fe645eeb --- /dev/null +++ b/regparser/notice/amendments/subpart.py @@ -0,0 +1,28 @@ +import functools + +from regparser.notice.amendments.utils import label_amdpar_from +from regparser.tree.gpo_cfr.subpart import build_subpart + + +def content_for_new_subpart(instruction_xml): + """Return a chunk of XML (which serves as a unique key) and a think for + parsing that XML as a subpart""" + label_parts, amdpar = label_amdpar_from(instruction_xml) + if (instruction_xml.tag == 'POST' and len(label_parts) == 2 + and 'Subpart:' in label_parts[1]): + xml = find_subpart(amdpar) + return xml, functools.partial(build_subpart, label_parts[0], xml) + + +def find_subpart(amdpar_tag): + """ Look amongst an amdpar tag's siblings to find a subpart. """ + for sibling in amdpar_tag.itersiblings(): + if sibling.tag == 'SUBPART': + return sibling + + +def process_designate_subpart(amendment): + """ Process the designate amendment if it adds a subpart. """ + label_id = '-'.join(amendment.label) + return {label_id: {'action': 'DESIGNATE', + 'destination': amendment.destination}} diff --git a/regparser/notice/amendments/utils.py b/regparser/notice/amendments/utils.py new file mode 100644 index 00000000..5330cfa5 --- /dev/null +++ b/regparser/notice/amendments/utils.py @@ -0,0 +1,5 @@ +def label_amdpar_from(instruction_xml): + label_parts = instruction_xml.get('label', '').split('-') + # ... + amdpar = instruction_xml.getparent().getparent() + return label_parts, amdpar diff --git a/regparser/notice/build.py b/regparser/notice/build.py index 24ec8d60..0076ebb4 100644 --- a/regparser/notice/build.py +++ b/regparser/notice/build.py @@ -3,7 +3,7 @@ from lxml import etree from regparser.grammar.unified import notice_cfr_p -from regparser.notice.amendments import fetch_amendments +from regparser.notice.amendments.fetch import fetch_amendments from regparser.notice.dates import fetch_dates from regparser.notice.sxs import (build_section_by_section, find_section_by_section) diff --git a/regparser/notice/compiler.py b/regparser/notice/compiler.py index ddfd28be..e9b213b1 100644 --- a/regparser/notice/compiler.py +++ b/regparser/notice/compiler.py @@ -10,7 +10,7 @@ from roman import fromRoman from regparser.grammar.tokens import Verb -from regparser.tree.gpo_cfr import interpretations, section +from regparser.layer.paragraph_markers import marker_of from regparser.tree.struct import Node, find, find_parent logger = logging.getLogger(__name__) @@ -88,18 +88,13 @@ def node_text_equality(left, right): def overwrite_marker(origin, new_label): """ The node passed in has a label, but we're going to give it a new one (new_label). This is necessary during node moves. """ - - if origin.node_type == Node.REGTEXT: - marker_list = section.initial_markers(origin.text) - if len(marker_list) > 0: - marker = '({0})'.format(marker_list[0]) - new_marker = '({0})'.format(new_label) - origin.text = origin.text.replace(marker, new_marker, 1) - elif origin.node_type == Node.INTERP: - marker = interpretations.get_first_interp_marker(origin.text) - marker = marker + '.' - new_marker = new_label + '.' - origin.text = origin.text.replace(marker, new_marker, 1) + marker = marker_of(origin) + if '(' in marker: + origin.text = origin.text.replace(marker, '({0})'.format(new_label), 1) + elif marker: + origin.text = origin.text.replace(marker, '{0}.'.format(new_label), 1) + else: + logger.warning("Cannot replace marker in %s", origin.text) return origin diff --git a/regparser/notice/xml.py b/regparser/notice/xml.py index 3aafa4c5..795d1878 100644 --- a/regparser/notice/xml.py +++ b/regparser/notice/xml.py @@ -15,7 +15,7 @@ from regparser.grammar.unified import notice_cfr_p from regparser.history.delays import delays_in_sentence from regparser.index.http_cache import http_client -from regparser.notice.amendments import fetch_amendments +from regparser.notice.amendments.fetch import fetch_amendments from regparser.notice.dates import fetch_dates from regparser.tree.xml_parser.xml_wrapper import XMLWrapper diff --git a/regparser/tree/gpo_cfr/appendices.py b/regparser/tree/gpo_cfr/appendices.py index 9f3df1ce..c8871278 100644 --- a/regparser/tree/gpo_cfr/appendices.py +++ b/regparser/tree/gpo_cfr/appendices.py @@ -9,7 +9,6 @@ from regparser.citations import internal_citations from regparser.grammar import appendix as grammar -from regparser.grammar.interpretation_headers import parser as headers from regparser.grammar.utils import Marker, QuickSearchable from regparser.layer.formatting import table_xml_to_plaintext from regparser.layer.key_terms import KeyTerms @@ -83,7 +82,7 @@ def set_letter(self, appendix): if self.appendix_letter: logger.warning("Found two appendix headers: %s and %s", self.appendix_letter, text) - self.appendix_letter = headers.parseString(text).appendix + self.appendix_letter = grammar.headers.parseString(text).appendix return self.appendix_letter def hed(self, part, text): diff --git a/regparser/tree/gpo_cfr/subpart.py b/regparser/tree/gpo_cfr/subpart.py index fae1d924..64ded4bd 100644 --- a/regparser/tree/gpo_cfr/subpart.py +++ b/regparser/tree/gpo_cfr/subpart.py @@ -26,14 +26,13 @@ def build_subjgrp(reg_part, subjgrp_xml, letter_list): return subjgrp -def build_subpart(reg_part, subpart_xml): - subpart_title = get_subpart_group_title(subpart_xml) - subpart = reg_text.build_subpart(subpart_title, reg_part) +def build_subpart(cfr_part, xml): + subpart_title = get_subpart_group_title(xml) + subpart = reg_text.build_subpart(subpart_title, cfr_part) sections = [] - for ch in subpart_xml.getchildren(): - if ch.tag == 'SECTION': - sections.extend(build_from_section(reg_part, ch)) + for ch in xml.xpath('./SECTION'): + sections.extend(build_from_section(cfr_part, ch)) subpart.children = sections return subpart diff --git a/setup.py b/setup.py index a93fa9a4..962b76bd 100644 --- a/setup.py +++ b/setup.py @@ -34,6 +34,14 @@ ], entry_points={ "console_scripts": "eregs=eregs:main", + "eregs_ns.parser.amendment.content": [ + ("new_subpart = regparser.notice.amendments.subpart:" + "content_for_new_subpart"), + ("regtext = regparser.notice.amendments.section:" + "content_for_regtext"), + ("appendix = regparser.notice.amendments.appendix:" + "content_for_appendix"), + ], "eregs_ns.parser.layer.cfr": [ "meta = regparser.layer.meta:Meta", ("internal-citations = regparser.layer.internal_citations:" @@ -87,7 +95,6 @@ "subpart = regparser.tree.gpo_cfr.subpart:parse_subpart", "subjgrp = regparser.tree.gpo_cfr.subpart:ParseSubjectGroup", "appendix = regparser.tree.gpo_cfr.appendices:parse_appendix", - "interp = regparser.tree.gpo_cfr.interpretations:parse_interp", ] } ) diff --git a/tests/grammar/__init__.py b/tests/grammar/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/grammar/appendix_tests.py b/tests/grammar/appendix_tests.py new file mode 100644 index 00000000..626506e1 --- /dev/null +++ b/tests/grammar/appendix_tests.py @@ -0,0 +1,34 @@ +from regparser.grammar import appendix + + +def test_par(): + match = appendix.headers.parseString("3(c)(4) Pandas") + assert match.section == '3' + assert match.p1 == 'c' + assert match.p2 == '4' + + +def test_section(): + match = appendix.headers.parseString("Section 105.11") + assert match.part == '105' + assert match.section == '11' + + +def test_newline(): + starts = [start for _, start, _ in + appendix.headers.scanString("\nSection 100.22")] + assert starts[0] == 1 + starts = [start for _, start, _ in + appendix.headers.scanString("\nParagraph 2(b)(2)")] + assert starts[0] == 1 + + +def test_marker_par(): + match = appendix.headers.parseString("Paragraph 3(b)") + assert match.section == '3' + assert match.p1 == 'b' + + +def test_appendix(): + match = appendix.headers.parseString("Appendix M - More Info") + assert match.appendix == 'M' diff --git a/tests/grammar_interpretation_headers_tests.py b/tests/grammar_interpretation_headers_tests.py deleted file mode 100644 index ee3fe9b5..00000000 --- a/tests/grammar_interpretation_headers_tests.py +++ /dev/null @@ -1,34 +0,0 @@ -from unittest import TestCase - -from regparser.grammar import interpretation_headers as ih_lib - - -class GrammarInterpretationHeadersTest(TestCase): - - def test_par(self): - match = ih_lib.parser.parseString("3(c)(4) Pandas") - self.assertEqual('3', match.section) - self.assertEqual('c', match.p1) - self.assertEqual('4', match.p2) - - def test_section(self): - match = ih_lib.parser.parseString("Section 105.11") - self.assertEqual('105', match.part) - self.assertEqual('11', match.section) - - def test_newline(self): - starts = [start for _, start, _ in - ih_lib.parser.scanString("\nSection 100.22")] - self.assertEqual(1, starts[0]) - starts = [start for _, start, _ in - ih_lib.parser.scanString("\nParagraph 2(b)(2)")] - self.assertEqual(1, starts[0]) - - def test_marker_par(self): - match = ih_lib.parser.parseString("Paragraph 3(b)") - self.assertEqual('3', match.section) - self.assertEqual('b', match.p1) - - def test_appendix(self): - match = ih_lib.parser.parseString("Appendix M - More Info") - self.assertEqual('M', match.appendix) diff --git a/tests/interpparser/amendments_tests.py b/tests/interpparser/amendments_tests.py new file mode 100644 index 00000000..285dac2d --- /dev/null +++ b/tests/interpparser/amendments_tests.py @@ -0,0 +1,119 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from mock import Mock + +from interpparser import amendments +from regparser.notice.amendments import fetch, section +from regparser.test_utils.xml_builder import XMLBuilder +from regparser.tree.xml_parser.preprocessors import preprocess_amdpars + + +def test_parse_interp(monkeypatch): + interp_lib = Mock() + monkeypatch.setattr(amendments, 'gpo_cfr', interp_lib) + xmls = [] + with XMLBuilder("REGTEXT") as ctx: + with ctx.EXTRACT(): + ctx.P("Something") + ctx.STARS() + ctx.HD("Supplement I") + ctx.HD("A") + ctx.T1("a") + ctx.P("b") + xmls.append(ctx.xml) + + with XMLBuilder("REGTEXT") as ctx: + ctx.P("Something") + ctx.STARS() + with ctx.SUBSECT(): + ctx.HD("Supplement I") + ctx.HD("A") + ctx.T1("a") + ctx.P("b") + xmls.append(ctx.xml) + + with XMLBuilder("REGTEXT") as ctx: + ctx.AMDPAR("1. In Supplement I to part 111, under...") + ctx.P("Something") + ctx.STARS() + ctx.HD("SUPPLEMENT I") + ctx.HD("A") + ctx.T1("a") + ctx.P("b") + xmls.append(ctx.xml) + + with XMLBuilder("REGTEXT") as ctx: + ctx.AMDPAR("1. In Supplement I to part 111, under...") + ctx.P("Something") + ctx.STARS() + with ctx.APPENDIX(): + ctx.HD("SUPPLEMENT I") + ctx.HD("A") + ctx.T1("a") + ctx.P("b") + ctx.PRTPAGE() + xmls.append(ctx.xml) + + for xml in xmls: + amendments.parse_interp('111', xml) + root, nodes = interp_lib.parse_from_xml.call_args[0] + assert root.label == ['111', 'Interp'] + assert [n.tag for n in nodes] == ['HD', 'T1', 'P'] + + +def test_parse_interp_subpart_confusion(): + with XMLBuilder("REGTEXT") as ctx: + ctx.AMDPAR("1. In Supplement I to part 111, under Section 33, " + "paragraph 5 is added.") + ctx.HD("Supplement I") + with ctx.SUBPART(): + with ctx.SECTION(): + ctx.SECTNO("§ 111.33") + ctx.SUBJECT("Stubby Subby") + ctx.STARS() + ctx.P("5. Some Content") + interp = amendments.parse_interp('111', ctx.xml) + assert len(interp.children) == 1 + c33 = interp.children[0] + assert c33.label == ['111', '33', 'Interp'] + assert len(c33.children) == 1 + c335 = c33.children[0] + assert c335.label == ['111', '33', 'Interp', '5'] + + +def test_process_amendments_restart_new_section(monkeypatch): + # turn on the interpretations plugin + monkeypatch.setattr(fetch, 'ExtensionManager', Mock(return_value=[ + Mock(plugin=amendments.content_for_interpretations), + Mock(plugin=section.content_for_regtext) + ])) + + amdpar1 = "1. In Supplement I to Part 104, comment 22(a) is added" + amdpar2 = "3. In § 105.1, revise paragraph (b) to read as follows:" + with XMLBuilder("ROOT") as ctx: + with ctx.REGTEXT(PART="104", TITLE="12"): + ctx.AMDPAR(amdpar1) + ctx.HD("SUPPLEMENT I", SOURCE='HED') + ctx.HD("22(a)", SOURCE='HD1') + ctx.P("1. Content") + with ctx.REGTEXT(PART="105", TITLE="12"): + ctx.AMDPAR(amdpar2) + with ctx.SECTION(): + ctx.SECTNO("§ 105.1") + ctx.SUBJECT("Purpose.") + ctx.STARS() + ctx.P("(b) This part carries out.") + preprocess_amdpars(ctx.xml) + + amd1, amd2 = fetch.fetch_amendments(ctx.xml) + changes1, changes2 = dict(amd1['changes']), dict(amd2['changes']) + assert amd1['instruction'] == amdpar1 + assert amd1['cfr_part'] == '104' + assert amd2['instruction'] == amdpar2 + assert amd2['cfr_part'] == '105' + assert '104-22-a-Interp' in changes1 + assert '105-1-b' in changes2 + + assert changes1['104-22-a-Interp'][0]['action'] == 'POST' + assert changes2['105-1-b'][0]['action'] == 'PUT' diff --git a/tests/interpparser/gpo_cfr_tests.py b/tests/interpparser/gpo_cfr_tests.py new file mode 100644 index 00000000..89d3fb6b --- /dev/null +++ b/tests/interpparser/gpo_cfr_tests.py @@ -0,0 +1,416 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import pytest +from lxml import etree + +from interpparser import gpo_cfr +from regparser.test_utils.xml_builder import XMLBuilder +from regparser.tree.xml_parser import tree_utils + + +def test_interpretation_markers(): + text = '1. Kiwis and Mangos' + assert gpo_cfr.get_first_interp_marker(text) == '1' + + +def test_interpretation_markers_roman(): + text = 'iv. Kiwis and Mangos' + assert gpo_cfr.get_first_interp_marker(text) == 'iv' + + +def test_interpretation_markers_emph(): + text = '1. Kiwis and Mangos' + assert gpo_cfr.get_first_interp_marker(text) == '1' + + text = '1. Kiwis and Mangos. More content.' + assert gpo_cfr.get_first_interp_marker(text) == '1' + + +def test_interpretation_markers_none(): + text = '(iv) Kiwis and Mangos' + assert gpo_cfr.get_first_interp_marker(text) is None + + +def test_interpretation_markers_stars_no_period(): + for marker in ('4 ', 'iv ', 'A\t'): + text = marker + '* * *' + assert gpo_cfr.get_first_interp_marker(text) == marker.strip() + + text = "33 * * * Some more stuff" + assert gpo_cfr.get_first_interp_marker(text) is None + + +def test_build_supplement_tree(): + """Integration test""" + with XMLBuilder('APPENDIX') as ctx: + ctx.HD("Supplement I to Part 737-Official Interpretations", + SOURCE='HED') + ctx.HD("Section 737.5 NASCAR", SOURCE='HD2') + ctx.P("1. Paragraph 1") + ctx.P("i. Paragraph i; A. Start of A") + ctx.HD("5(a) Access Device", SOURCE='HD2') + ctx.P("1. Paragraph 111") + ctx.P("i. Content content") + ctx.P("ii. More content") + ctx.P("A. Aaaaah") + ctx.child_from_string('

1. More info

') + ctx.child_from_string('

2. Second info

') + ctx.child_from_string('

3. Keyterms

') + tree = gpo_cfr.build_supplement_tree('737', ctx.xml) + assert tree.label == ['737', 'Interp'] + assert len(tree.children) == 1 + + i5 = tree.children[0] + assert i5.label == ['737', '5', 'Interp'] + assert len(i5.children) == 2 + + i51, i5a = i5.children + assert i51.label == ['737', '5', 'Interp', '1'] + assert len(i51.children) == 1 + i51i = i51.children[0] + assert i51i.label == ['737', '5', 'Interp', '1', 'i'] + assert len(i51i.children) == 1 + i51ia = i51i.children[0] + assert i51ia.label == ['737', '5', 'Interp', '1', 'i', 'A'] + assert i51ia.children == [] + + assert i5a.label == ['737', '5', 'a', 'Interp'] + assert len(i5a.children) == 1 + i5a1 = i5a.children[0] + assert i5a1.label == ['737', '5', 'a', 'Interp', '1'] + assert len(i5a1.children) == 2 + i5a1i, i5a1ii = i5a1.children + assert i5a1i.label == ['737', '5', 'a', 'Interp', '1', 'i'] + assert i5a1i.children == [] + + assert i5a1ii.label == ['737', '5', 'a', 'Interp', '1', 'ii'] + assert len(i5a1ii.children) == 1 + i5a1iia = i5a1ii.children[0] + assert i5a1iia.label == ['737', '5', 'a', 'Interp', '1', 'ii', 'A'] + assert len(i5a1iia.children) == 3 + i5a1iia1, i5a1iia2, i5a1iia3 = i5a1iia.children + assert i5a1iia1.label == ['737', '5', 'a', 'Interp', '1', 'ii', 'A', '1'] + assert i5a1iia1.tagged_text == '1. More info' + assert i5a1iia1.children == [] + assert i5a1iia2.label == ['737', '5', 'a', 'Interp', '1', 'ii', 'A', '2'] + assert i5a1iia2.tagged_text == '2. Second info' + assert i5a1iia2.children == [] + assert i5a1iia3.label == ['737', '5', 'a', 'Interp', '1', 'ii', 'A', '3'] + assert i5a1iia3.tagged_text == '3. Keyterms' + assert i5a1iia3.children == [] + + +def test_build_supplement_tree_spacing(): + """Integration test""" + with XMLBuilder('APPENDIX') as ctx: + ctx.HD("Supplement I to Part 737-Official Interpretations", + SOURCE='HED') + ctx.HD("Section 737.5 NASCAR", SOURCE='HD2') + ctx.child_from_string('

1.Phrase. More Content

') + ctx.child_from_string('

i. I likeice cream

') + ctx.P("A. Aaaaah") + ctx.child_from_string('

1.More info

') + tree = gpo_cfr.build_supplement_tree('737', ctx.xml) + assert tree.label == ['737', 'Interp'] + assert len(tree.children) == 1 + + s5 = tree.children[0] + assert len(s5.children) == 1 + + s51 = s5.children[0] + assert s51.text == "1. Phrase. More Content" + assert len(s51.children) == 1 + + s51i = s51.children[0] + assert s51i.text == "i. I like ice cream" + assert len(s51i.children) == 1 + + s51ia = s51i.children[0] + assert s51ia.text == "A. Aaaaah" + assert len(s51ia.children) == 1 + + s51ia1 = s51ia.children[0] + assert s51ia1.text == "1. More info" + assert s51ia1.children == [] + + +def test_build_supplement_tree_repeats(): + """Integration test""" + with XMLBuilder('APPENDIX') as ctx: + ctx.HD("Supplement I to Part 737-Official Interpretations", + SOURCE='HED') + ctx.HD("Appendices G and H-Content", SOURCE='HD2') + ctx.P("1. G:H") + ctx.HD("Appendix G", SOURCE='HD2') + ctx.P("1. G") + ctx.HD("Appendix H", SOURCE='HD2') + ctx.P("1. H") + tree = gpo_cfr.build_supplement_tree('737', ctx.xml) + assert tree.label == ['737', 'Interp'] + assert len(tree.children) == 3 + aGH, aG, aH = tree.children + assert aGH.label == ['737', 'G_H', 'Interp'] + assert aG.label == ['737', 'G', 'Interp'] + assert aH.label == ['737', 'H', 'Interp'] + + +def test_build_supplement_tree_skip_levels(): + with XMLBuilder('APPENDIX') as ctx: + ctx.HD("Supplement I to Part 737-Official Interpretations", + SOURCE='HED') + ctx.HD("Section 737.5 NASCAR", SOURCE='HD2') + ctx.HD("5(a)(1)(i) Access Device", SOURCE='HD2') + ctx.P("1. Paragraph 111") + ctx.HD("5(b) Other Devices", SOURCE='HD2') + ctx.P("1. Paragraph 222") + tree = gpo_cfr.build_supplement_tree('737', ctx.xml) + assert tree.label == ['737', 'Interp'] + assert len(tree.children) == 1 + + i5 = tree.children[0] + assert i5.label == ['737', '5', 'Interp'] + assert len(i5.children) == 2 + i5a, i5b = i5.children + + assert i5a.label == ['737', '5', 'a', 'Interp'] + assert len(i5a.children) == 1 + i5a1 = i5a.children[0] + + assert i5a1.label == ['737', '5', 'a', '1', 'Interp'] + assert len(i5a1.children) == 1 + i5a1i = i5a1.children[0] + + assert i5a1i.label == ['737', '5', 'a', '1', 'i', 'Interp'] + assert len(i5a1i.children) == 1 + + assert i5b.label == ['737', '5', 'b', 'Interp'] + assert len(i5b.children) == 1 + + +def test_build_supplement_tree_appendix_paragraphs(): + with XMLBuilder('APPENDIX') as ctx: + ctx.HD("Supplement I to Part 737-Official Interpretations", + SOURCE='HED') + ctx.HD("Appendix H", SOURCE='HD2') + ctx.HD("(b) bbbbbbb", SOURCE='HD3') + ctx.P("1. Paragraph b") + ctx.HD("(b)(5) b5b5b5", SOURCE='HD3') + ctx.P("1. Paragraph b5") + tree = gpo_cfr.build_supplement_tree('737', ctx.xml) + assert tree.label == ['737', 'Interp'] + assert len(tree.children) == 1 + + ih = tree.children[0] + assert ih.label == ['737', 'H', 'Interp'] + assert len(ih.children) == 1 + + ihb = ih.children[0] + assert ihb.label == ['737', 'H', 'b', 'Interp'] + assert len(ihb.children) == 2 + + ihb1, ihb5 = ihb.children + assert ihb1.label == ['737', 'H', 'b', 'Interp', '1'] + assert ihb5.label == ['737', 'H', 'b', '5', 'Interp'] + + +def test_build_supplement_intro_section(): + """Integration test""" + with XMLBuilder('APPENDIX') as ctx: + ctx.HD("Supplement I to Part 737-Official Interpretations", + SOURCE='HED') + ctx.HD("Introduction", SOURCE='HD1') + ctx.P("1. Some content. (a) Badly named") + ctx.P("(b) Badly named") + ctx.HD("Subpart A", SOURCE='HD1') + ctx.HD("Section 737.13", SOURCE='HD2') + ctx.child_from_string("

13(a) Some Stuff!

") + ctx.P("1. 131313") + ctx.HD("Appendix G", SOURCE='HD2') + ctx.P("1. G") + tree = gpo_cfr.build_supplement_tree('737', ctx.xml) + assert tree.label == ['737', 'Interp'] + assert len(tree.children) == 3 + h1, s13, g = tree.children + + assert h1.label == ['737', 'Interp', 'h1'] + assert s13.label == ['737', '13', 'Interp'] + assert g.label == ['737', 'G', 'Interp'] + + assert len(h1.children) == 1 + assert h1.children[0].text == ('1. Some content. (a) Badly named\n\n' + '(b) Badly named') + assert h1.children[0].children == [] + + assert len(s13.children) == 1 + assert s13.children[0].title == '13(a) Some Stuff!' + + +def test_process_inner_child(): + with XMLBuilder('ROOT') as ctx: + ctx.HD("Title") + ctx.P("1. 111. i. iii") + ctx.STARS() + ctx.P("A. AAA") + ctx.child_from_string('

1. eee

') + node = ctx.xml.xpath('//HD')[0] + stack = tree_utils.NodeStack() + gpo_cfr.process_inner_children(stack, node) + while stack.size() > 1: + stack.unwind() + n1 = stack.m_stack[0][0][1] + assert n1.label == ['1'] + assert len(n1.children) == 1 + + n1i = n1.children[0] + assert n1i.label == ['1', 'i'] + assert n1i.text == 'i. iii' + assert len(n1i.children) == 1 + + n1ia = n1i.children[0] + assert n1ia.label == ['1', 'i', 'A'] + assert len(n1ia.children) == 1 + + n1ia1 = n1ia.children[0] + assert n1ia1.label == ['1', 'i', 'A', '1'] + assert n1ia1.children == [] + + +def test_process_inner_child_space(): + with XMLBuilder('ROOT') as ctx: + ctx.HD("Title") + ctx.P("1. 111") + ctx.P("i. See country A. Not that country") + node = ctx.xml.xpath('//HD')[0] + stack = tree_utils.NodeStack() + gpo_cfr.process_inner_children(stack, node) + while stack.size() > 1: + stack.unwind() + n1 = stack.m_stack[0][0][1] + assert n1.label == ['1'] + assert len(n1.children) == 1 + + n1i = n1.children[0] + assert n1i.label == ['1', 'i'] + assert n1i.children == [] + + +def test_process_inner_child_incorrect_xml(): + with XMLBuilder('ROOT') as ctx: + ctx.HD("Title") + ctx.child_from_string('

1. 111

') + ctx.P("i. iii") + ctx.child_from_string('

2. 222 Incorrect Content

') + node = ctx.xml.xpath('//HD')[0] + stack = tree_utils.NodeStack() + gpo_cfr.process_inner_children(stack, node) + while stack.size() > 1: + stack.unwind() + assert len(stack.m_stack[0]) == 2 + + +def test_process_inner_child_no_marker(): + with XMLBuilder() as ctx: + ctx.HD("Title") + ctx.P("1. 111") + ctx.P("i. iii") + ctx.P("Howdy Howdy") + node = ctx.xml.xpath('//HD')[0] + stack = tree_utils.NodeStack() + gpo_cfr.process_inner_children(stack, node) + while stack.size() > 1: + stack.unwind() + i1 = stack.m_stack[0][0][1] + assert len(i1.children) == 1 + i1i = i1.children[0] + assert i1i.children == [] + assert i1i.text == "i. iii\n\nHowdy Howdy" + + +def test_process_inner_child_has_citation(): + with XMLBuilder() as ctx: + ctx.HD("Title") + ctx.P("1. Something something see comment 22(a)-2.i. please") + node = ctx.xml.xpath('//HD')[0] + stack = tree_utils.NodeStack() + gpo_cfr.process_inner_children(stack, node) + while stack.size() > 1: + stack.unwind() + tree = stack.m_stack[0][0][1] + assert tree.children == [] + + +def test_process_inner_child_stars_and_inline(): + with XMLBuilder() as ctx: + ctx.HD("Title") + ctx.STARS() + ctx.P("2. Content. * * *") + ctx.STARS() + ctx.P("xi. Content") + ctx.STARS() + node = ctx.xml.xpath('//HD')[0] + stack = tree_utils.NodeStack() + gpo_cfr.process_inner_children(stack, node) + while stack.size() > 1: + stack.unwind() + tree = stack.m_stack[0][0][1] + assert tree.label == ['2'] + assert len(tree.children) == 1 + assert tree.children[0].label == ['2', 'xi'] + assert tree.children[0].children == [] + + +def test_process_inner_child_collapsed_i(): + with XMLBuilder() as ctx: + ctx.HD("Title") + ctx.child_from_string( + '

1. Keyterm text i. Content content

') + ctx.P("ii. Other stuff") + node = ctx.xml.xpath('//HD')[0] + stack = tree_utils.NodeStack() + gpo_cfr.process_inner_children(stack, node) + while stack.size() > 1: + stack.unwind() + tree = stack.m_stack[0][0][1] + assert tree.label == ['1'] + assert len(tree.children) == 2 + assert tree.children[0].label == ['1', 'i'] + assert tree.children[0].children == [] + assert tree.children[1].label == ['1', 'ii'] + assert tree.children[1].children == [] + + +@pytest.mark.parametrize('title', [ + "Some Title", + "Some Title", + "

Section 111.22

", + "

21(b) Contents.

", + "

31(r) Contents.

", + "

Section 111.31 Contents.

", + "

Paragraph 51(b)(1)(i).

", +]) +def test_is_title_success(title): + assert gpo_cfr.is_title(etree.fromstring(title)) + + +@pytest.mark.parametrize('title', [ + "Some Header", + "Some Image", + "

Then Section 22.111

", + "

Section 222.33 More text

", + "

Keyterm. More text

", +]) +def test_is_title_fail(title): + assert not gpo_cfr.is_title(etree.fromstring(title)) + + +def test_collapsed_markers_matches(): + assert ['i'] == [m.group(1) for m in gpo_cfr.collapsed_markers_matches( + '1. AAA - i. More', '1. AAA - i. More')] + assert ['1'] == [m.group(1) for m in gpo_cfr.collapsed_markers_matches( + 'A. AAA: 1. More', 'A. AAA: 1. More')] + for txt in ("1. Content - i.e. More content", + "1. Stuff in quotes like, “N.A.”", + "i. References appendix D, part I.A.1. Stuff" + "A. AAA - 1. More, without tags"): + assert gpo_cfr.collapsed_markers_matches(txt, txt) == [] diff --git a/tests/tree_interpretation_tests.py b/tests/interpparser/tree_tests.py similarity index 58% rename from tests/tree_interpretation_tests.py rename to tests/interpparser/tree_tests.py index fc3f1f1d..56727cb7 100644 --- a/tests/tree_interpretation_tests.py +++ b/tests/interpparser/tree_tests.py @@ -1,35 +1,31 @@ # -*- coding: utf-8 -*- +from interpparser import tree from regparser.citations import Label -from regparser.tree import interpretation def test_text_to_labels(): text = u"9(c)(2)(iii) Charges not Covered by § 1026.6(b)(1) and " text += "(b)(2)" - result = interpretation.text_to_labels( - text, Label(part='1111', comment=True)) + result = tree.text_to_labels(text, Label(part='1111', comment=True)) assert result == [['1111', '9', 'c', '2', 'iii', 'Interp']] text = "Paragraphs 4(b)(7) and (b)(8)." - result = interpretation.text_to_labels( - text, Label(part='1111', comment=True)) + result = tree.text_to_labels(text, Label(part='1111', comment=True)) assert result == [['1111', '4', 'b', '7', 'Interp'], ['1111', '4', 'b', '8', 'Interp']] text = "Appendices G and H-Something" - result = interpretation.text_to_labels( - text, Label(part='1111', comment=True)) + result = tree.text_to_labels(text, Label(part='1111', comment=True)) assert result == [['1111', 'G', 'Interp'], ['1111', 'H', 'Interp']] text = "Paragraph 38(l)(7)(i)(A)(2)." - result = interpretation.text_to_labels( - text, Label(part='1111', comment=True)) + result = tree.text_to_labels(text, Label(part='1111', comment=True)) assert result == [['1111', '38', 'l', '7', 'i', 'A', '2', 'Interp']] def test_merge_labels(): labels = [['1021', 'A'], ['1021', 'B']] - assert interpretation.merge_labels(labels) == ['1021', 'A_B'] + assert tree.merge_labels(labels) == ['1021', 'A_B'] labels = [['1021', 'A', '1'], ['1021', 'A', '2']] - assert interpretation.merge_labels(labels) == ['1021', 'A', '1_2'] + assert tree.merge_labels(labels) == ['1021', 'A', '1_2'] diff --git a/tests/notice/amendments/__init__.py b/tests/notice/amendments/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/notice/amendments/appendix_tests.py b/tests/notice/amendments/appendix_tests.py new file mode 100644 index 00000000..9e47f51c --- /dev/null +++ b/tests/notice/amendments/appendix_tests.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from mock import Mock + +from regparser.notice.amendments import appendix, fetch, section +from regparser.test_utils.xml_builder import XMLBuilder +from regparser.tree.xml_parser.preprocessors import preprocess_amdpars + + +def test_parse_appendix(monkeypatch): + process = Mock() + monkeypatch.setattr(appendix, 'process_appendix', process) + + with XMLBuilder("ROOT") as ctx: + ctx.AMDPAR("1. Adding Appendix R and S") + ctx.HD("Appendix R to Part 1234", SOURCE="HD1") + with ctx.EXTRACT(): + ctx.P("R1") + ctx.P("R2") + ctx.HD("Appendix S to Part 1234", SOURCE="HD1") + with ctx.EXTRACT(): + ctx.P("S1") + ctx.P("S2") + + appendix.parse_appendix(ctx.xml, '1234', 'S') + assert process.call_count == 1 + extract = process.call_args[0][0] + assert ['Appendix S to Part 1234', 'S1', 'S2'] == [n.text for n in extract] + + appendix.parse_appendix(ctx.xml, '1234', 'R') + assert process.call_count == 2 + extract = process.call_args[0][0] + assert ['Appendix R to Part 1234', 'R1', 'R2'] == [n.text for n in extract] + + +def test_process_amendments_context(monkeypatch): + """Context should carry over between REGTEXTs""" + # turn on the interpretations plugin + monkeypatch.setattr(fetch, 'ExtensionManager', Mock(return_value=[ + Mock(plugin=appendix.content_for_appendix), + Mock(plugin=section.content_for_regtext) + ])) + amdpar1 = "3. In § 106.1, revise paragraph (a) to read as follows:" + amdpar2 = "3. Add appendix C" + with XMLBuilder("ROOT") as ctx: + with ctx.REGTEXT(TITLE="12"): + ctx.AMDPAR(amdpar1) + with ctx.SECTION(): + ctx.SECTNO("§ 106.1") + ctx.SUBJECT("Some Subject.") + ctx.P("(a) Something new") + with ctx.REGTEXT(TITLE="12"): + ctx.AMDPAR(amdpar2) + ctx.HD("Appendix C to Part 106", SOURCE="HD1") + with ctx.EXTRACT(): + ctx.P("Text") + preprocess_amdpars(ctx.xml) + + amd1, amd2 = fetch.fetch_amendments(ctx.xml) + assert amd1['instruction'] == amdpar1 + assert amd1['cfr_part'] == '106' + assert amd2['instruction'] == amdpar2 + assert amd2['cfr_part'] == '106' + assert ['106-1-a'] == [c[0] for c in amd1['changes']] + assert ['106-C', '106-C-p1'] == list(sorted(c[0] for c in amd2['changes'])) diff --git a/tests/notice/amendments/fetch_tests.py b/tests/notice/amendments/fetch_tests.py new file mode 100644 index 00000000..d4b52e35 --- /dev/null +++ b/tests/notice/amendments/fetch_tests.py @@ -0,0 +1,143 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +from regparser.notice import changes +from regparser.notice.amdparser import Amendment +from regparser.notice.amendments import fetch +from regparser.test_utils.xml_builder import XMLBuilder +from regparser.tree.struct import Node +from regparser.tree.xml_parser.preprocessors import preprocess_amdpars + + +def test_process_amendments_authority(): + amdpar = ('1. The authority citation for 27 CFR Part 555 continues to ' + 'read as follows:') + auth = '18 U.S.C. 847.' + with XMLBuilder("ROOT") as ctx: + with ctx.REGTEXT(TITLE="27", PART="555"): + ctx.AMDPAR(amdpar) + with ctx.AUTH(): + ctx.HD("Authority:", SOURCE="HED") + ctx.P(auth) + preprocess_amdpars(ctx.xml) + + amendment = fetch.fetch_amendments(ctx.xml)[0] + assert amendment['instruction'] == amdpar + assert amendment['cfr_part'] == '555' + assert amendment['authority'] == auth + assert 'changes' not in amendment + + +def test_create_xmlless_changes(): + labels_amended = [Amendment('DELETE', '200-?-2-a'), + Amendment('MOVE', '200-?-2-b', '200-?-2-c')] + notice_changes = changes.NoticeChanges() + for amendment in labels_amended: + fetch.create_xmlless_change(amendment, notice_changes) + + delete = notice_changes.changes_by_xml[None]['200-2-a'][0] + move = notice_changes.changes_by_xml[None]['200-2-b'][0] + assert delete == {'action': 'DELETE'} + assert move == {'action': 'MOVE', 'destination': ['200', '2', 'c']} + + +def test_create_xml_changes_reserve(): + labels_amended = [Amendment('RESERVE', '200-?-2-a')] + + n2a = Node('[Reserved]', label=['200', '2', 'a']) + n2 = Node('n2', label=['200', '2'], children=[n2a]) + root = Node('root', label=['200'], children=[n2]) + + notice_changes = changes.NoticeChanges() + fetch.create_xml_changes(labels_amended, root, notice_changes) + + reserve = notice_changes.changes_by_xml[None]['200-2-a'][0] + assert reserve['action'] == 'RESERVE' + assert reserve['node']['text'] == '[Reserved]' + + +def test_create_xml_changes_stars(): + labels_amended = [Amendment('PUT', '200-?-2-a')] + n2a1 = Node('(1) Content', label=['200', '2', 'a', '1']) + n2a2 = Node('(2) Content', label=['200', '2', 'a', '2']) + n2a = Node('(a) * * *', label=['200', '2', 'a'], children=[n2a1, n2a2]) + n2 = Node('n2', label=['200', '2'], children=[n2a]) + root = Node('root', label=['200'], children=[n2]) + + notice_changes = changes.NoticeChanges() + fetch.create_xml_changes(labels_amended, root, notice_changes) + data = notice_changes.changes_by_xml[None] + + for label in ('200-2-a-1', '200-2-a-2'): + assert label in data + assert len(data[label]) == 1 + change = data[label][0] + assert change['action'] == 'PUT' + assert 'field' not in change + + assert '200-2-a' in data + assert len(data['200-2-a']) == 1 + change = data['200-2-a'][0] + assert change['action'] == 'KEEP' + assert 'field' not in change + + +def test_create_xml_changes_stars_hole(): + labels_amended = [Amendment('PUT', '200-?-2-a')] + n2a1 = Node('(1) * * *', label=['200', '2', 'a', '1']) + n2a2 = Node('(2) a2a2a2', label=['200', '2', 'a', '2']) + n2a = Node('(a) aaa', label=['200', '2', 'a'], children=[n2a1, n2a2]) + n2 = Node('n2', label=['200', '2'], children=[n2a]) + root = Node('root', label=['200'], children=[n2]) + + notice_changes = changes.NoticeChanges() + fetch.create_xml_changes(labels_amended, root, notice_changes) + + data = notice_changes.changes_by_xml[None] + for label in ('200-2-a', '200-2-a-2'): + assert label in data + assert len(data[label]) == 1 + change = data[label][0] + assert change['action'] == 'PUT' + assert 'field' not in change + + assert '200-2-a-1' in data + assert len(data['200-2-a-1']) == 1 + change = data['200-2-a-1'][0] + assert change['action'] == 'KEEP' + assert 'field' not in change + + +def test_create_xml_changes_child_stars(): + labels_amended = [Amendment('PUT', '200-?-2-a')] + with XMLBuilder("ROOT") as ctx: + ctx.P("(a) Content") + ctx.STARS() + n2a = Node('(a) Content', label=['200', '2', 'a'], + source_xml=ctx.xml.xpath('//P')[0]) + n2b = Node('(b) Content', label=['200', '2', 'b']) + n2 = Node('n2', label=['200', '2'], children=[n2a, n2b]) + root = Node('root', label=['200'], children=[n2]) + + notice_changes = changes.NoticeChanges() + fetch.create_xml_changes(labels_amended, root, notice_changes) + data = notice_changes.changes_by_xml[None] + + assert '200-2-a' in data + assert len(data['200-2-a']) == 1 + change = data['200-2-a'][0] + assert change['action'] == 'PUT' + assert 'field' not in change + + n2a.text = n2a.text + ":" + n2a.source_xml.text = n2a.source_xml.text + ":" + + notice_changes = changes.NoticeChanges() + fetch.create_xml_changes(labels_amended, root, notice_changes) + data = notice_changes.changes_by_xml[None] + + assert '200-2-a' in data + assert len(data['200-2-a']) == 1 + change = data['200-2-a'][0] + assert change['action'] == 'PUT' + assert change.get('field') == '[text]' diff --git a/tests/notice/amendments/section_tests.py b/tests/notice/amendments/section_tests.py new file mode 100644 index 00000000..af848f30 --- /dev/null +++ b/tests/notice/amendments/section_tests.py @@ -0,0 +1,275 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import pytest +from mock import Mock + +from regparser.notice.amendments import fetch, section +from regparser.test_utils.xml_builder import XMLBuilder +from regparser.tree.xml_parser.preprocessors import preprocess_amdpars + + +def test_find_section(): + with XMLBuilder('REGTEXT') as ctx: + ctx.AMDPAR("In 200.1 revise paragraph (b) as follows:") + with ctx.SECTION(): + ctx.SECTNO("200.1") + ctx.SUBJECT("Authority and Purpose.") + ctx.P(" (b) This part is very important. ") + ctx.AMDPAR("In 200.3 revise paragraph (b)(1) as follows:") + with ctx.SECTION(): + ctx.SECTNO("200.3") + ctx.SUBJECT("Definitions") + ctx.P(" (b)(1) Define a term here. ") + + amdpar_xml = ctx.xml.xpath('//AMDPAR')[0] + sect = section.find_section(amdpar_xml) + assert sect.tag == 'SECTION' + + sectno_xml = sect.xpath('./SECTNO')[0] + assert sectno_xml.text == '200.1' + + +def test_find_section_paragraphs(): + with XMLBuilder("REGTEXT") as ctx: + with ctx.SECTION(): + ctx.SECTNO(" 205.4 ") + ctx.SUBJECT("[Corrected]") + ctx.AMDPAR("3. In § 105.1, revise paragraph (b) to read as follows:") + ctx.P("(b) paragraph 1") + + amdpar = ctx.xml.xpath('//AMDPAR')[0] + sect = section.find_section(amdpar) + assert sect is not None + paragraphs = [p for p in sect if p.tag == 'P'] + assert paragraphs[0].text == '(b) paragraph 1' + + +def test_find_lost_section(): + with XMLBuilder("PART") as ctx: + with ctx.REGTEXT(): + ctx.AMDPAR("3. In § 105.1, revise paragraph (b) to read as " + "follows:") + with ctx.REGTEXT(): + with ctx.SECTION(): + ctx.SECTNO(" 205.4 ") + ctx.SUBJECT("[Corrected]") + amdpar = ctx.xml.xpath('//AMDPAR')[0] + sect = section.find_lost_section(amdpar) + assert sect is not None + + +def test_find_section_lost(): + with XMLBuilder("PART") as ctx: + with ctx.REGTEXT(): + ctx.AMDPAR("3. In § 105.1, revise paragraph (b) to read as " + "follows:") + with ctx.REGTEXT(): + with ctx.SECTION(): + ctx.SECTNO(" 205.4 ") + ctx.SUBJECT("[Corrected]") + amdpar = ctx.xml.xpath('//AMDPAR')[0] + sect = section.find_section(amdpar) + assert sect is not None + + +@pytest.fixture +def content_plugin_installed(monkeypatch): + # turn on the section plugin + monkeypatch.setattr(fetch, 'ExtensionManager', Mock(return_value=[ + Mock(plugin=section.content_for_regtext) + ])) + + +@pytest.mark.usefixtures('content_plugin_installed') +def test_introductory_text(): + """ Sometimes notices change just the introductory text of a paragraph + (instead of changing the entire paragraph tree). """ + with XMLBuilder("REGTEXT", PART="106", TITLE="12") as ctx: + ctx.AMDPAR("3. In § 106.2, revise the introductory text to read:") + with ctx.SECTION(): + ctx.SECTNO("§ 106.2") + ctx.SUBJECT(" Definitions ") + ctx.P(" Except as otherwise provided, the following apply. ") + preprocess_amdpars(ctx.xml) + + amendment = fetch.fetch_amendments(ctx.xml)[0] + change = dict(amendment['changes'])['106-2'][0] + assert change.get('field') == '[text]' + + +@pytest.mark.usefixtures('content_plugin_installed') +def test_process_amendments_insert_in_order(): + amdpar = '[insert-in-order] [label:123-45-p6]' + with XMLBuilder("ROOT") as ctx: + with ctx.REGTEXT(TITLE="10"): + ctx.AMDPAR(amdpar) + with ctx.SECTION(): + ctx.SECTNO("§ 123.45") + ctx.SUBJECT("Some Subject.") + ctx.STARS() + ctx.P("This is the sixth paragraph") + ctx.STARS() + preprocess_amdpars(ctx.xml) + + amendment = fetch.fetch_amendments(ctx.xml)[0] + changes = dict(amendment['changes']) + + assert amendment['instruction'] == amdpar + assert amendment['cfr_part'] == '123' + assert ['123-45-p6'] == list(changes.keys()) + assert changes['123-45-p6'][0]['action'] == 'INSERT' + + +@pytest.mark.usefixtures('content_plugin_installed') +def test_process_amendments_mix_regs(): + """Some notices apply to multiple regs. For now, just ignore the + sections not associated with the reg we're focused on""" + amdpar1 = "3. In § 105.1, revise paragraph (a) to read as follows:" + amdpar2 = "3. In § 106.3, revise paragraph (b) to read as follows:" + with XMLBuilder("ROOT") as ctx: + with ctx.REGTEXT(PART="105", TITLE="12"): + ctx.AMDPAR(amdpar1) + with ctx.SECTION(): + ctx.SECTNO("§ 105.1") + ctx.SUBJECT("105Purpose.") + ctx.P("(a) 105Content") + with ctx.REGTEXT(PART="106", TITLE="12"): + ctx.AMDPAR(amdpar2) + with ctx.SECTION(): + ctx.SECTNO("§ 106.3") + ctx.SUBJECT("106Purpose.") + ctx.P("(b) Content") + preprocess_amdpars(ctx.xml) + + amd1, amd2 = fetch.fetch_amendments(ctx.xml) + assert amd1['instruction'] == amdpar1 + assert amd1['cfr_part'] == '105' + assert amd2['instruction'] == amdpar2 + assert amd2['cfr_part'] == '106' + assert ['105-1-a'] == [c[0] for c in amd1['changes']] + assert ['106-3-b'] == [c[0] for c in amd2['changes']] + + +@pytest.mark.usefixtures('content_plugin_installed') +def test_process_amendments_multiple_sections(): + """Regression test verifying multiple SECTIONs in the same REGTEXT""" + amdpar1 = "1. Modify § 111.22 by revising paragraph (b)" + amdpar2 = "2. Modify § 111.33 by revising paragraph (c)" + with XMLBuilder("REGTEXT", PART="111") as ctx: + ctx.AMDPAR(amdpar1) + with ctx.SECTION(): + ctx.SECTNO("§ 111.22") + ctx.SUBJECT("Subject Here.") + ctx.STARS() + ctx.P("(b) Revised second paragraph") + ctx.AMDPAR(amdpar2) + with ctx.SECTION(): + ctx.SECTNO("§ 111.33") + ctx.SUBJECT("Another Subject") + ctx.STARS() + ctx.P("(c) Revised third paragraph") + preprocess_amdpars(ctx.xml) + + amd1, amd2 = fetch.fetch_amendments(ctx.xml) + assert amd1['instruction'] == amdpar1 + assert amd1['cfr_part'] == '111' + assert ['111-22-b'] == [c[0] for c in amd1['changes']] + assert amd2['instruction'] == amdpar2 + assert amd2['cfr_part'] == '111' + assert ['111-33-c'] == [c[0] for c in amd2['changes']] + + +@pytest.mark.usefixtures('content_plugin_installed') +def test_process_amendments_markerless(): + amdpar = "1. Revise [label:105-11-p5] as blah" + with XMLBuilder("REGTEXT", PART="105", TITLE="12") as ctx: + ctx.AMDPAR(amdpar) + with ctx.SECTION(): + ctx.SECTNO("§ 105.11") + ctx.SUBJECT("Purpose.") + ctx.STARS() + ctx.P("Some text here") + preprocess_amdpars(ctx.xml) + + amendment = fetch.fetch_amendments(ctx.xml)[0] + changes = dict(amendment['changes']) + + assert amendment['instruction'] == amdpar + assert amendment['cfr_part'] == '105' + assert ['105-11-p5'] == list(changes.keys()) + changes = changes['105-11-p5'][0] + assert changes['action'] == 'PUT' + + +@pytest.mark.usefixtures('content_plugin_installed') +def test_process_amendments_no_nodes(): + amdpar = "1. In § 104.13, paragraph (b) is removed" + with XMLBuilder("ROOT") as ctx: + with ctx.REGTEXT(PART="104", TITLE="12"): + ctx.AMDPAR(amdpar) + preprocess_amdpars(ctx.xml) + + amendment = fetch.fetch_amendments(ctx.xml)[0] + changes = dict(amendment['changes']) + + assert amendment['instruction'] == amdpar + assert amendment['cfr_part'] == '104' + assert '104-13-b' in changes + assert changes['104-13-b'][0]['action'] == 'DELETE' + + +@pytest.mark.usefixtures('content_plugin_installed') +def test_process_amendments_multiple_in_same_parent(): + amdpar1 = "1. In § 105.1, revise paragraph (b) to read as follows:" + amdpar2 = "2. Also, revise paragraph (c):" + with XMLBuilder("REGTEXT", PART="105", TITLE="12") as ctx: + ctx.AMDPAR(amdpar1) + ctx.AMDPAR(amdpar2) + with ctx.SECTION(): + ctx.SECTNO("§ 105.1") + ctx.SUBJECT("Purpose.") + ctx.STARS() + ctx.P("(b) This part carries out.") + ctx.P("(c) More stuff") + preprocess_amdpars(ctx.xml) + + amd1, amd2 = fetch.fetch_amendments(ctx.xml) + changes1, changes2 = dict(amd1['changes']), dict(amd2['changes']) + assert amd1['instruction'] == amdpar1 + assert amd1['cfr_part'] == '105' + assert amd2['instruction'] == amdpar2 + assert amd2['cfr_part'] == '105' + assert ['105-1-b'] == list(changes1.keys()) + assert ['105-1-c'] == list(changes2.keys()) + + changes = changes1['105-1-b'][0] + assert changes['action'] == 'PUT' + assert changes['node']['text'] == '(b) This part carries out.' + changes = changes2['105-1-c'][0] + assert changes['action'] == 'PUT' + assert changes['node']['text'] == '(c) More stuff' + + +@pytest.mark.usefixtures('content_plugin_installed') +def test_process_amendments_section(): + amdpar = "3. In § 105.1, revise paragraph (b) to read as follows:" + with XMLBuilder("REGTEXT", PART="105", TITLE="12") as ctx: + ctx.AMDPAR(amdpar) + with ctx.SECTION(): + ctx.SECTNO("§ 105.1") + ctx.SUBJECT("Purpose.") + ctx.STARS() + ctx.P("(b) This part carries out.") + preprocess_amdpars(ctx.xml) + + amendment = fetch.fetch_amendments(ctx.xml)[0] + changes = dict(amendment['changes']) + + assert amendment['instruction'] == amdpar + assert amendment['cfr_part'] == '105' + assert ['105-1-b'] == list(changes.keys()) + + changes = changes['105-1-b'][0] + assert changes['action'] == 'PUT' + assert changes['node']['text'] == '(b) This part carries out.' diff --git a/tests/notice/amendments/subpart_tests.py b/tests/notice/amendments/subpart_tests.py new file mode 100644 index 00000000..f4a3f955 --- /dev/null +++ b/tests/notice/amendments/subpart_tests.py @@ -0,0 +1,119 @@ +# -*- coding: utf-8 -*- +from __future__ import unicode_literals + +import pytest +from mock import Mock + +from regparser.notice.amdparser import Amendment +from regparser.notice.amendments import fetch, section, subpart +from regparser.test_utils.xml_builder import XMLBuilder +from regparser.tree.xml_parser.preprocessors import preprocess_amdpars + + +@pytest.fixture +def content_plugin_installed(monkeypatch): + # turn on the subpart plugin + monkeypatch.setattr(fetch, 'ExtensionManager', Mock(return_value=[ + Mock(plugin=subpart.content_for_new_subpart), + Mock(plugin=section.content_for_regtext) + ])) + + +def test_process_designate_subpart(): + amended_label = Amendment('MOVE_INTO_SUBPART', '200-?-1-a', + '205-Subpart:A') + + subpart_changes = subpart.process_designate_subpart(amended_label) + + assert ['200-1-a'] == list(subpart_changes.keys()) + change = subpart_changes['200-1-a'] + assert change['destination'] == ['205', 'Subpart', 'A'] + assert change['action'] == 'DESIGNATE' + + +@pytest.mark.usefixtures('content_plugin_installed') +def test_multiple_changes(monkeypatch): + """ A notice can have two modifications to a paragraph. """ + amdpar1 = ("2. Designate §§ 106.1 through 106.3 as subpart A under " + "the heading.") + amdpar2 = "3. In § 106.2, revise the introductory text to read:" + with XMLBuilder("ROOT") as ctx: + with ctx.REGTEXT(PART="106", TITLE="12"): + ctx.AMDPAR(amdpar1) + with ctx.REGTEXT(PART="106", TITLE="12"): + ctx.AMDPAR(amdpar2) + with ctx.SECTION(): + ctx.SECTNO("§ 106.2") + ctx.SUBJECT(" Definitions ") + ctx.P(" Except as otherwise provided, the following " + "apply. ") + preprocess_amdpars(ctx.xml) + + amd1, amd2 = fetch.fetch_amendments(ctx.xml) + changes1, changes2 = dict(amd1['changes']), dict(amd2['changes']) + assert amd1['instruction'] == amdpar1 + assert amd1['cfr_part'] == '106' + assert amd2['instruction'] == amdpar2 + assert amd2['cfr_part'] == '106' + assert len(changes1['106-2']) == 1 + assert len(changes2['106-2']) == 1 + + +@pytest.mark.usefixtures('content_plugin_installed') +def test_process_amendments_subpart(monkeypatch): + with XMLBuilder("RULE") as ctx: + with ctx.REGTEXT(PART="105", TITLE="12"): + ctx.AMDPAR("3. In § 105.1, revise paragraph (b) to read as" + "follows:") + with ctx.SECTION(): + ctx.SECTNO("§ 105.1") + ctx.SUBJECT("Purpose.") + ctx.STARS() + ctx.P("(b) This part carries out.") + with ctx.REGTEXT(PART="105", TITLE="12"): + ctx.AMDPAR("6. Add subpart B to read as follows:") + with ctx.CONTENTS(): + with ctx.SUBPART(): + ctx.SECHD("Sec.") + ctx.SECTNO("105.30") + ctx.SUBJECT("First In New Subpart.") + with ctx.SUBPART(): + ctx.HD("Subpart B—Requirements", SOURCE="HED") + with ctx.SECTION(): + ctx.SECTNO("105.30") + ctx.SUBJECT("First In New Subpart") + ctx.P("For purposes of this subpart, the follow " + "apply:") + ctx.P('(a) "Agent" means agent.') + + preprocess_amdpars(ctx.xml) + + subpart_amendment = fetch.fetch_amendments(ctx.xml)[1] + changes = dict(subpart_amendment['changes']) + + assert '105-Subpart-B' in changes + assert '105-30-a' in changes + assert '105-30' in changes + + +@pytest.mark.usefixtures('content_plugin_installed') +def test_process_amendments(): + amdpar = ("2. Designate §§ 105.1 through 105.3 as subpart A under the " + "heading.") + with XMLBuilder("REGTEXT", PART="105", TITLE="12") as ctx: + with ctx.SUBPART(): + ctx.HD("Subpart A—General", SOURCE="HED") + ctx.AMDPAR(amdpar) + preprocess_amdpars(ctx.xml) + + amendment = fetch.fetch_amendments(ctx.xml)[0] + changes = dict(amendment['changes']) + + assert amendment['instruction'] == amdpar + assert amendment['cfr_part'] == '105' + assert ['105-1', '105-2', '105-3'] == list(sorted(changes.keys())) + for change_list in changes.values(): + assert len(change_list) == 1 + change = change_list[0] + assert change['destination'] == ['105', 'Subpart', 'A'] + assert change['action'] == 'DESIGNATE' diff --git a/tests/notice_amendments_tests.py b/tests/notice_amendments_tests.py deleted file mode 100644 index 2045ad9d..00000000 --- a/tests/notice_amendments_tests.py +++ /dev/null @@ -1,635 +0,0 @@ -# -*- coding: utf-8 -*- -from unittest import TestCase - -import six -from mock import patch - -from regparser.notice import amendments, changes -from regparser.notice.amdparser import Amendment -from regparser.test_utils.xml_builder import XMLBuilder -from regparser.tree.struct import Node -from regparser.tree.xml_parser.preprocessors import preprocess_amdpars - - -class NoticeAmendmentsTest(TestCase): - @patch('regparser.notice.amendments.process_appendix') - def test_parse_appendix(self, process): - with XMLBuilder("ROOT") as ctx: - ctx.AMDPAR("1. Adding Appendix R and S") - ctx.HD("Appendix R to Part 1234", SOURCE="HD1") - with ctx.EXTRACT(): - ctx.P("R1") - ctx.P("R2") - ctx.HD("Appendix S to Part 1234", SOURCE="HD1") - with ctx.EXTRACT(): - ctx.P("S1") - ctx.P("S2") - - amendments.parse_appendix(ctx.xml, '1234', 'S') - self.assertEqual(process.call_count, 1) - extract = process.call_args[0][0] - self.assertEqual(['Appendix S to Part 1234', 'S1', 'S2'], - [n.text for n in extract]) - - amendments.parse_appendix(ctx.xml, '1234', 'R') - self.assertEqual(process.call_count, 2) - extract = process.call_args[0][0] - self.assertEqual(['Appendix R to Part 1234', 'R1', 'R2'], - [n.text for n in extract]) - - @patch('regparser.notice.amendments.interpretations') - def test_parse_interp(self, interpretations): - xmls = [] - with XMLBuilder("REGTEXT") as ctx: - with ctx.EXTRACT(): - ctx.P("Something") - ctx.STARS() - ctx.HD("Supplement I") - ctx.HD("A") - ctx.T1("a") - ctx.P("b") - xmls.append(ctx.xml) - - with XMLBuilder("REGTEXT") as ctx: - ctx.P("Something") - ctx.STARS() - with ctx.SUBSECT(): - ctx.HD("Supplement I") - ctx.HD("A") - ctx.T1("a") - ctx.P("b") - xmls.append(ctx.xml) - - with XMLBuilder("REGTEXT") as ctx: - ctx.AMDPAR("1. In Supplement I to part 111, under...") - ctx.P("Something") - ctx.STARS() - ctx.HD("SUPPLEMENT I") - ctx.HD("A") - ctx.T1("a") - ctx.P("b") - xmls.append(ctx.xml) - - with XMLBuilder("REGTEXT") as ctx: - ctx.AMDPAR("1. In Supplement I to part 111, under...") - ctx.P("Something") - ctx.STARS() - with ctx.APPENDIX(): - ctx.HD("SUPPLEMENT I") - ctx.HD("A") - ctx.T1("a") - ctx.P("b") - ctx.PRTPAGE() - xmls.append(ctx.xml) - - for xml in xmls: - amendments.parse_interp('111', xml) - root, nodes = interpretations.parse_from_xml.call_args[0] - self.assertEqual(root.label, ['111', 'Interp']) - self.assertEqual(['HD', 'T1', 'P'], [n.tag for n in nodes]) - - def test_parse_interp_subpart_confusion(self): - with XMLBuilder("REGTEXT") as ctx: - ctx.AMDPAR("1. In Supplement I to part 111, under Section 33, " - "paragraph 5 is added.") - ctx.HD("Supplement I") - with ctx.SUBPART(): - with ctx.SECTION(): - ctx.SECTNO(u"§ 111.33") - ctx.SUBJECT("Stubby Subby") - ctx.STARS() - ctx.P("5. Some Content") - interp = amendments.parse_interp('111', ctx.xml) - self.assertEqual(1, len(interp.children)) - c33 = interp.children[0] - self.assertEqual(c33.label, ['111', '33', 'Interp']) - self.assertEqual(1, len(c33.children)) - c335 = c33.children[0] - self.assertEqual(c335.label, ['111', '33', 'Interp', '5']) - - def test_find_section(self): - with XMLBuilder('REGTEXT') as ctx: - ctx.AMDPAR("In 200.1 revise paragraph (b) as follows:") - with ctx.SECTION(): - ctx.SECTNO("200.1") - ctx.SUBJECT("Authority and Purpose.") - ctx.P(" (b) This part is very important. ") - ctx.AMDPAR("In 200.3 revise paragraph (b)(1) as follows:") - with ctx.SECTION(): - ctx.SECTNO("200.3") - ctx.SUBJECT("Definitions") - ctx.P(" (b)(1) Define a term here. ") - - amdpar_xml = ctx.xml.xpath('//AMDPAR')[0] - section = amendments.find_section(amdpar_xml) - self.assertEqual(section.tag, 'SECTION') - - sectno_xml = section.xpath('./SECTNO')[0] - self.assertEqual(sectno_xml.text, '200.1') - - def test_find_section_paragraphs(self): - with XMLBuilder("REGTEXT") as ctx: - with ctx.SECTION(): - ctx.SECTNO(" 205.4 ") - ctx.SUBJECT("[Corrected]") - ctx.AMDPAR(u"3. In § 105.1, revise paragraph (b) to read as " - u"follows:") - ctx.P("(b) paragraph 1") - - amdpar = ctx.xml.xpath('//AMDPAR')[0] - section = amendments.find_section(amdpar) - self.assertNotEqual(None, section) - paragraphs = [p for p in section if p.tag == 'P'] - self.assertEqual(paragraphs[0].text, '(b) paragraph 1') - - def test_find_lost_section(self): - with XMLBuilder("PART") as ctx: - with ctx.REGTEXT(): - ctx.AMDPAR(u"3. In § 105.1, revise paragraph (b) to read as " - u"follows:") - with ctx.REGTEXT(): - with ctx.SECTION(): - ctx.SECTNO(" 205.4 ") - ctx.SUBJECT("[Corrected]") - amdpar = ctx.xml.xpath('//AMDPAR')[0] - section = amendments.find_lost_section(amdpar) - self.assertNotEqual(None, section) - - def test_find_section_lost(self): - with XMLBuilder("PART") as ctx: - with ctx.REGTEXT(): - ctx.AMDPAR(u"3. In § 105.1, revise paragraph (b) to read as " - u"follows:") - with ctx.REGTEXT(): - with ctx.SECTION(): - ctx.SECTNO(" 205.4 ") - ctx.SUBJECT("[Corrected]") - amdpar = ctx.xml.xpath('//AMDPAR')[0] - section = amendments.find_section(amdpar) - self.assertNotEqual(None, section) - - def test_process_designate_subpart(self): - amended_label = Amendment('MOVE_INTO_SUBPART', '200-?-1-a', - '205-Subpart:A') - - subpart_changes = amendments.process_designate_subpart(amended_label) - - six.assertCountEqual(self, ['200-1-a'], subpart_changes.keys()) - change = subpart_changes['200-1-a'] - self.assertEqual(change['destination'], ['205', 'Subpart', 'A']) - self.assertEqual(change['action'], 'DESIGNATE') - - def test_process_amendments(self): - amdpar = (u"2. Designate §§ 105.1 through 105.3 as subpart A under " - u"the heading.") - with XMLBuilder("REGTEXT", PART="105", TITLE="12") as ctx: - with ctx.SUBPART(): - ctx.HD(u"Subpart A—General", SOURCE="HED") - ctx.AMDPAR(amdpar) - preprocess_amdpars(ctx.xml) - - amendment = amendments.fetch_amendments(ctx.xml)[0] - changes = dict(amendment['changes']) - - self.assertEqual(amendment['instruction'], amdpar) - self.assertEqual(amendment['cfr_part'], '105') - six.assertCountEqual(self, ['105-1', '105-2', '105-3'], changes.keys()) - for change_list in changes.values(): - self.assertEqual(1, len(change_list)) - change = change_list[0] - self.assertEqual(change['destination'], ['105', 'Subpart', 'A']) - self.assertEqual(change['action'], 'DESIGNATE') - - def test_process_amendments_section(self): - amdpar = u"3. In § 105.1, revise paragraph (b) to read as follows:" - with XMLBuilder("REGTEXT", PART="105", TITLE="12") as ctx: - ctx.AMDPAR(amdpar) - with ctx.SECTION(): - ctx.SECTNO(u"§ 105.1") - ctx.SUBJECT("Purpose.") - ctx.STARS() - ctx.P("(b) This part carries out.") - preprocess_amdpars(ctx.xml) - - amendment = amendments.fetch_amendments(ctx.xml)[0] - changes = dict(amendment['changes']) - - self.assertEqual(amendment['instruction'], amdpar) - self.assertEqual(amendment['cfr_part'], '105') - six.assertCountEqual(self, changes.keys(), ['105-1-b']) - - changes = changes['105-1-b'][0] - self.assertEqual(changes['action'], 'PUT') - self.assertTrue(changes['node']['text'].startswith( - u'(b) This part carries out.')) - - def test_process_amendments_multiple_in_same_parent(self): - amdpar1 = u"1. In § 105.1, revise paragraph (b) to read as follows:" - amdpar2 = "2. Also, revise paragraph (c):" - with XMLBuilder("REGTEXT", PART="105", TITLE="12") as ctx: - ctx.AMDPAR(amdpar1) - ctx.AMDPAR(amdpar2) - with ctx.SECTION(): - ctx.SECTNO(u"§ 105.1") - ctx.SUBJECT("Purpose.") - ctx.STARS() - ctx.P("(b) This part carries out.") - ctx.P("(c) More stuff") - preprocess_amdpars(ctx.xml) - - amd1, amd2 = amendments.fetch_amendments(ctx.xml) - changes1, changes2 = dict(amd1['changes']), dict(amd2['changes']) - self.assertEqual(amd1['instruction'], amdpar1) - self.assertEqual(amd1['cfr_part'], '105') - self.assertEqual(amd2['instruction'], amdpar2) - self.assertEqual(amd2['cfr_part'], '105') - six.assertCountEqual(self, changes1.keys(), ['105-1-b']) - six.assertCountEqual(self, changes2.keys(), ['105-1-c']) - - changes = changes1['105-1-b'][0] - self.assertEqual(changes['action'], 'PUT') - self.assertEqual(changes['node']['text'].strip(), - u'(b) This part carries out.') - changes = changes2['105-1-c'][0] - self.assertEqual(changes['action'], 'PUT') - self.assertTrue(changes['node']['text'].strip(), - u'(c) More stuff') - - def test_process_amendments_restart_new_section(self): - amdpar1 = "1. In Supplement I to Part 104, comment 22(a) is added" - amdpar2 = u"3. In § 105.1, revise paragraph (b) to read as follows:" - with XMLBuilder("ROOT") as ctx: - with ctx.REGTEXT(PART="104", TITLE="12"): - ctx.AMDPAR(amdpar1) - ctx.HD("SUPPLEMENT I", SOURCE='HED') - ctx.HD("22(a)", SOURCE='HD1') - ctx.P("1. Content") - with ctx.REGTEXT(PART="105", TITLE="12"): - ctx.AMDPAR(amdpar2) - with ctx.SECTION(): - ctx.SECTNO(u"§ 105.1") - ctx.SUBJECT("Purpose.") - ctx.STARS() - ctx.P("(b) This part carries out.") - preprocess_amdpars(ctx.xml) - - amd1, amd2 = amendments.fetch_amendments(ctx.xml) - changes1, changes2 = dict(amd1['changes']), dict(amd2['changes']) - self.assertEqual(amd1['instruction'], amdpar1) - self.assertEqual(amd1['cfr_part'], '104') - self.assertEqual(amd2['instruction'], amdpar2) - self.assertEqual(amd2['cfr_part'], '105') - self.assertIn('104-22-a-Interp', changes1) - self.assertIn('105-1-b', changes2) - - self.assertEqual(changes1['104-22-a-Interp'][0]['action'], 'POST') - self.assertEqual(changes2['105-1-b'][0]['action'], 'PUT') - - def test_process_amendments_no_nodes(self): - amdpar = u"1. In § 104.13, paragraph (b) is removed" - with XMLBuilder("ROOT") as ctx: - with ctx.REGTEXT(PART="104", TITLE="12"): - ctx.AMDPAR(amdpar) - preprocess_amdpars(ctx.xml) - - amendment = amendments.fetch_amendments(ctx.xml)[0] - changes = dict(amendment['changes']) - - self.assertEqual(amendment['instruction'], amdpar) - self.assertEqual(amendment['cfr_part'], '104') - self.assertIn('104-13-b', changes) - self.assertEqual(changes['104-13-b'][0]['action'], 'DELETE') - - def test_process_amendments_markerless(self): - amdpar = u"1. Revise [label:105-11-p5] as blah" - with XMLBuilder("REGTEXT", PART="105", TITLE="12") as ctx: - ctx.AMDPAR(amdpar) - with ctx.SECTION(): - ctx.SECTNO(u"§ 105.11") - ctx.SUBJECT("Purpose.") - ctx.STARS() - ctx.P("Some text here") - preprocess_amdpars(ctx.xml) - - amendment = amendments.fetch_amendments(ctx.xml)[0] - changes = dict(amendment['changes']) - - self.assertEqual(amendment['instruction'], amdpar) - self.assertEqual(amendment['cfr_part'], '105') - six.assertCountEqual(self, changes.keys(), ['105-11-p5']) - changes = changes['105-11-p5'][0] - self.assertEqual(changes['action'], 'PUT') - - def test_process_amendments_multiple_sections(self): - """Regression test verifying multiple SECTIONs in the same REGTEXT""" - amdpar1 = u"1. Modify § 111.22 by revising paragraph (b)" - amdpar2 = u"2. Modify § 111.33 by revising paragraph (c)" - with XMLBuilder("REGTEXT", PART="111") as ctx: - ctx.AMDPAR(amdpar1) - with ctx.SECTION(): - ctx.SECTNO(u"§ 111.22") - ctx.SUBJECT("Subject Here.") - ctx.STARS() - ctx.P("(b) Revised second paragraph") - ctx.AMDPAR(amdpar2) - with ctx.SECTION(): - ctx.SECTNO(u"§ 111.33") - ctx.SUBJECT("Another Subject") - ctx.STARS() - ctx.P("(c) Revised third paragraph") - preprocess_amdpars(ctx.xml) - - amd1, amd2 = amendments.fetch_amendments(ctx.xml) - self.assertEqual(amd1['instruction'], amdpar1) - self.assertEqual(amd1['cfr_part'], '111') - six.assertCountEqual(self, - [c[0] for c in amd1['changes']], ['111-22-b']) - self.assertEqual(amd2['instruction'], amdpar2) - self.assertEqual(amd2['cfr_part'], '111') - six.assertCountEqual(self, - [c[0] for c in amd2['changes']], ['111-33-c']) - - def test_process_amendments_subpart(self): - with XMLBuilder("RULE") as ctx: - with ctx.REGTEXT(PART="105", TITLE="12"): - ctx.AMDPAR(u"3. In § 105.1, revise paragraph (b) to read as" - u"follows:") - with ctx.SECTION(): - ctx.SECTNO(u"§ 105.1") - ctx.SUBJECT("Purpose.") - ctx.STARS() - ctx.P("(b) This part carries out.") - with ctx.REGTEXT(PART="105", TITLE="12"): - ctx.AMDPAR("6. Add subpart B to read as follows:") - with ctx.CONTENTS(): - with ctx.SUBPART(): - ctx.SECHD("Sec.") - ctx.SECTNO("105.30") - ctx.SUBJECT("First In New Subpart.") - with ctx.SUBPART(): - ctx.HD(u"Subpart B—Requirements", SOURCE="HED") - with ctx.SECTION(): - ctx.SECTNO("105.30") - ctx.SUBJECT("First In New Subpart") - ctx.P("For purposes of this subpart, the follow " - "apply:") - ctx.P('(a) "Agent" means agent.') - - preprocess_amdpars(ctx.xml) - - subpart_amendment = amendments.fetch_amendments(ctx.xml)[1] - changes = dict(subpart_amendment['changes']) - - self.assertTrue('105-Subpart-B' in changes) - self.assertTrue('105-30-a' in changes) - self.assertTrue('105-30' in changes) - - def test_process_amendments_mix_regs(self): - """Some notices apply to multiple regs. For now, just ignore the - sections not associated with the reg we're focused on""" - amdpar1 = u"3. In § 105.1, revise paragraph (a) to read as follows:" - amdpar2 = u"3. In § 106.3, revise paragraph (b) to read as follows:" - with XMLBuilder("ROOT") as ctx: - with ctx.REGTEXT(PART="105", TITLE="12"): - ctx.AMDPAR(amdpar1) - with ctx.SECTION(): - ctx.SECTNO(u"§ 105.1") - ctx.SUBJECT("105Purpose.") - ctx.P("(a) 105Content") - with ctx.REGTEXT(PART="106", TITLE="12"): - ctx.AMDPAR(amdpar2) - with ctx.SECTION(): - ctx.SECTNO(u"§ 106.3") - ctx.SUBJECT("106Purpose.") - ctx.P("(b) Content") - preprocess_amdpars(ctx.xml) - - amd1, amd2 = amendments.fetch_amendments(ctx.xml) - self.assertEqual(amd1['instruction'], amdpar1) - self.assertEqual(amd1['cfr_part'], '105') - self.assertEqual(amd2['instruction'], amdpar2) - self.assertEqual(amd2['cfr_part'], '106') - six.assertCountEqual(self, - [c[0] for c in amd1['changes']], ['105-1-a']) - six.assertCountEqual(self, - [c[0] for c in amd2['changes']], ['106-3-b']) - - def test_process_amendments_context(self): - """Context should carry over between REGTEXTs""" - amdpar1 = u"3. In § 106.1, revise paragraph (a) to read as follows:" - amdpar2 = "3. Add appendix C" - with XMLBuilder("ROOT") as ctx: - with ctx.REGTEXT(TITLE="12"): - ctx.AMDPAR(amdpar1) - with ctx.SECTION(): - ctx.SECTNO(u"§ 106.1") - ctx.SUBJECT("Some Subject.") - ctx.P("(a) Something new") - with ctx.REGTEXT(TITLE="12"): - ctx.AMDPAR(amdpar2) - ctx.HD("Appendix C to Part 106", SOURCE="HD1") - with ctx.EXTRACT(): - ctx.P("Text") - preprocess_amdpars(ctx.xml) - - amd1, amd2 = amendments.fetch_amendments(ctx.xml) - self.assertEqual(amd1['instruction'], amdpar1) - self.assertEqual(amd1['cfr_part'], '106') - self.assertEqual(amd2['instruction'], amdpar2) - self.assertEqual(amd2['cfr_part'], '106') - six.assertCountEqual(self, - [c[0] for c in amd1['changes']], ['106-1-a']) - six.assertCountEqual( - self, - [c[0] for c in amd2['changes']], ['106-C', '106-C-p1']) - - def test_process_amendments_insert_in_order(self): - amdpar = '[insert-in-order] [label:123-45-p6]' - with XMLBuilder("ROOT") as ctx: - with ctx.REGTEXT(TITLE="10"): - ctx.AMDPAR(amdpar) - with ctx.SECTION(): - ctx.SECTNO(u"§ 123.45") - ctx.SUBJECT("Some Subject.") - ctx.STARS() - ctx.P("This is the sixth paragraph") - ctx.STARS() - preprocess_amdpars(ctx.xml) - - amendment = amendments.fetch_amendments(ctx.xml)[0] - changes = dict(amendment['changes']) - - self.assertEqual(amendment['instruction'], amdpar) - self.assertEqual(amendment['cfr_part'], '123') - six.assertCountEqual(self, ['123-45-p6'], changes.keys()) - self.assertEqual('INSERT', changes['123-45-p6'][0]['action']) - - def test_process_amendments_authority(self): - amdpar = ('1. The authority citation for 27 CFR Part 555 continues ' - 'to read as follows:') - auth = '18 U.S.C. 847.' - with XMLBuilder("ROOT") as ctx: - with ctx.REGTEXT(TITLE="27", PART="555"): - ctx.AMDPAR(amdpar) - with ctx.AUTH(): - ctx.HD("Authority:", SOURCE="HED") - ctx.P(auth) - preprocess_amdpars(ctx.xml) - - amendment = amendments.fetch_amendments(ctx.xml)[0] - self.assertEqual(amendment['instruction'], amdpar) - self.assertEqual(amendment['cfr_part'], '555') - self.assertEqual(amendment['authority'], auth) - self.assertNotIn('changes', amendment) - - def test_introductory_text(self): - """ Sometimes notices change just the introductory text of a paragraph - (instead of changing the entire paragraph tree). """ - with XMLBuilder("REGTEXT", PART="106", TITLE="12") as ctx: - ctx.AMDPAR(u"3. In § 106.2, revise the introductory text to read:") - with ctx.SECTION(): - ctx.SECTNO(u"§ 106.2") - ctx.SUBJECT(" Definitions ") - ctx.P(" Except as otherwise provided, the following apply. ") - preprocess_amdpars(ctx.xml) - - amendment = amendments.fetch_amendments(ctx.xml)[0] - change = dict(amendment['changes'])['106-2'][0] - self.assertEqual('[text]', change.get('field')) - - def test_multiple_changes(self): - """ A notice can have two modifications to a paragraph. """ - amdpar1 = (u"2. Designate §§ 106.1 through 106.3 as subpart A under " - u"the heading.") - amdpar2 = u"3. In § 106.2, revise the introductory text to read:" - with XMLBuilder("ROOT") as ctx: - with ctx.REGTEXT(PART="106", TITLE="12"): - ctx.AMDPAR(amdpar1) - with ctx.REGTEXT(PART="106", TITLE="12"): - ctx.AMDPAR(amdpar2) - with ctx.SECTION(): - ctx.SECTNO(u"§ 106.2") - ctx.SUBJECT(" Definitions ") - ctx.P(" Except as otherwise provided, the following " - "apply. ") - preprocess_amdpars(ctx.xml) - - amd1, amd2 = amendments.fetch_amendments(ctx.xml) - changes1, changes2 = dict(amd1['changes']), dict(amd2['changes']) - self.assertEqual(amd1['instruction'], amdpar1) - self.assertEqual(amd1['cfr_part'], '106') - self.assertEqual(amd2['instruction'], amdpar2) - self.assertEqual(amd2['cfr_part'], '106') - self.assertEqual(1, len(changes1['106-2'])) - self.assertEqual(1, len(changes2['106-2'])) - - def test_create_xmlless_changes(self): - labels_amended = [Amendment('DELETE', '200-?-2-a'), - Amendment('MOVE', '200-?-2-b', '200-?-2-c')] - notice_changes = changes.NoticeChanges() - for amendment in labels_amended: - amendments.create_xmlless_change(amendment, notice_changes) - - delete = notice_changes.changes_by_xml[None]['200-2-a'][0] - move = notice_changes.changes_by_xml[None]['200-2-b'][0] - self.assertEqual({'action': 'DELETE'}, delete) - self.assertEqual({'action': 'MOVE', 'destination': ['200', '2', 'c']}, - move) - - def test_create_xml_changes_reserve(self): - labels_amended = [Amendment('RESERVE', '200-?-2-a')] - - n2a = Node('[Reserved]', label=['200', '2', 'a']) - n2 = Node('n2', label=['200', '2'], children=[n2a]) - root = Node('root', label=['200'], children=[n2]) - - notice_changes = changes.NoticeChanges() - amendments.create_xml_changes(labels_amended, root, notice_changes) - - reserve = notice_changes.changes_by_xml[None]['200-2-a'][0] - self.assertEqual(reserve['action'], 'RESERVE') - self.assertEqual(reserve['node']['text'], u'[Reserved]') - - def test_create_xml_changes_stars(self): - labels_amended = [Amendment('PUT', '200-?-2-a')] - n2a1 = Node('(1) Content', label=['200', '2', 'a', '1']) - n2a2 = Node('(2) Content', label=['200', '2', 'a', '2']) - n2a = Node('(a) * * *', label=['200', '2', 'a'], children=[n2a1, n2a2]) - n2 = Node('n2', label=['200', '2'], children=[n2a]) - root = Node('root', label=['200'], children=[n2]) - - notice_changes = changes.NoticeChanges() - amendments.create_xml_changes(labels_amended, root, notice_changes) - data = notice_changes.changes_by_xml[None] - - for label in ('200-2-a-1', '200-2-a-2'): - self.assertIn(label, data) - self.assertEqual(1, len(data[label])) - change = data[label][0] - self.assertEqual('PUT', change['action']) - self.assertNotIn('field', change) - - self.assertTrue('200-2-a' in data) - self.assertEqual(1, len(data['200-2-a'])) - change = data['200-2-a'][0] - self.assertEqual('KEEP', change['action']) - self.assertNotIn('field', change) - - def test_create_xml_changes_stars_hole(self): - labels_amended = [Amendment('PUT', '200-?-2-a')] - n2a1 = Node('(1) * * *', label=['200', '2', 'a', '1']) - n2a2 = Node('(2) a2a2a2', label=['200', '2', 'a', '2']) - n2a = Node('(a) aaa', label=['200', '2', 'a'], children=[n2a1, n2a2]) - n2 = Node('n2', label=['200', '2'], children=[n2a]) - root = Node('root', label=['200'], children=[n2]) - - notice_changes = changes.NoticeChanges() - amendments.create_xml_changes(labels_amended, root, notice_changes) - - data = notice_changes.changes_by_xml[None] - for label in ('200-2-a', '200-2-a-2'): - self.assertIn(label, data) - self.assertEqual(1, len(data[label])) - change = data[label][0] - self.assertEqual('PUT', change['action']) - self.assertNotIn('field', change) - - self.assertIn('200-2-a-1', data) - self.assertEqual(1, len(data['200-2-a-1'])) - change = data['200-2-a-1'][0] - self.assertEqual('KEEP', change['action']) - self.assertFalse('field' in change) - - def test_create_xml_changes_child_stars(self): - labels_amended = [Amendment('PUT', '200-?-2-a')] - with XMLBuilder("ROOT") as ctx: - ctx.P("(a) Content") - ctx.STARS() - n2a = Node('(a) Content', label=['200', '2', 'a'], - source_xml=ctx.xml.xpath('//P')[0]) - n2b = Node('(b) Content', label=['200', '2', 'b']) - n2 = Node('n2', label=['200', '2'], children=[n2a, n2b]) - root = Node('root', label=['200'], children=[n2]) - - notice_changes = changes.NoticeChanges() - amendments.create_xml_changes(labels_amended, root, notice_changes) - data = notice_changes.changes_by_xml[None] - - self.assertIn('200-2-a', data) - self.assertTrue(1, len(data['200-2-a'])) - change = data['200-2-a'][0] - self.assertEqual('PUT', change['action']) - self.assertNotIn('field', change) - - n2a.text = n2a.text + ":" - n2a.source_xml.text = n2a.source_xml.text + ":" - - notice_changes = changes.NoticeChanges() - amendments.create_xml_changes(labels_amended, root, notice_changes) - data = notice_changes.changes_by_xml[None] - - self.assertIn('200-2-a', data) - self.assertTrue(1, len(data['200-2-a'])) - change = data['200-2-a'][0] - self.assertEqual('PUT', change['action']) - self.assertEqual('[text]', change.get('field')) diff --git a/tests/tree/gpo_cfr/interpretations_tests.py b/tests/tree/gpo_cfr/interpretations_tests.py deleted file mode 100644 index 2f52b426..00000000 --- a/tests/tree/gpo_cfr/interpretations_tests.py +++ /dev/null @@ -1,439 +0,0 @@ -# -*- coding: utf-8 -*- -from unittest import TestCase - -from lxml import etree - -from regparser.tree.gpo_cfr import interpretations -from regparser.tree.xml_parser import tree_utils - - -class InterpretationsTest(TestCase): - def test_interpretation_markers(self): - text = '1. Kiwis and Mangos' - marker = interpretations.get_first_interp_marker(text) - self.assertEqual(marker, '1') - - def test_interpretation_markers_roman(self): - text = 'iv. Kiwis and Mangos' - marker = interpretations.get_first_interp_marker(text) - self.assertEqual(marker, 'iv') - - def test_interpretation_markers_emph(self): - text = '1. Kiwis and Mangos' - marker = interpretations.get_first_interp_marker(text) - self.assertEqual(marker, '1') - - text = '1. Kiwis and Mangos. More content.' - marker = interpretations.get_first_interp_marker(text) - self.assertEqual(marker, '1') - - def test_interpretation_markers_none(self): - text = '(iv) Kiwis and Mangos' - marker = interpretations.get_first_interp_marker(text) - self.assertEqual(marker, None) - - def test_interpretation_markers_stars_no_period(self): - for marker in ('4 ', 'iv ', 'A\t'): - text = marker + '* * *' - found_marker = interpretations.get_first_interp_marker(text) - self.assertEqual(marker.strip(), found_marker) - - text = "33 * * * Some more stuff" - found_marker = interpretations.get_first_interp_marker(text) - self.assertEqual(None, found_marker) - - def test_build_supplement_tree(self): - """Integration test""" - xml = """ - - Supplement I to Part 737-Official Interpretations - Section 737.5 NASCAR -

1. Paragraph 1

-

i. Paragraph i; A. Start of A

- 5(a) Access Device -

1. Paragraph 111

-

i. Content content

-

ii. More content

-

A. Aaaaah

-

1. More info

-

2. Second info

-

3. Keyterms

-
""" - tree = interpretations.build_supplement_tree('737', - etree.fromstring(xml)) - self.assertEqual(['737', 'Interp'], tree.label) - self.assertEqual(1, len(tree.children)) - - i5 = tree.children[0] - self.assertEqual(['737', '5', 'Interp'], i5.label) - self.assertEqual(2, len(i5.children)) - - i51, i5a = i5.children - self.assertEqual(['737', '5', 'Interp', '1'], i51.label) - self.assertEqual(1, len(i51.children)) - i51i = i51.children[0] - self.assertEqual(['737', '5', 'Interp', '1', 'i'], i51i.label) - self.assertEqual(1, len(i51i.children)) - i51ia = i51i.children[0] - self.assertEqual(['737', '5', 'Interp', '1', 'i', 'A'], i51ia.label) - self.assertEqual(0, len(i51ia.children)) - - self.assertEqual(['737', '5', 'a', 'Interp'], i5a.label) - self.assertEqual(1, len(i5a.children)) - i5a1 = i5a.children[0] - self.assertEqual(['737', '5', 'a', 'Interp', '1'], i5a1.label) - self.assertEqual(2, len(i5a1.children)) - i5a1i, i5a1ii = i5a1.children - self.assertEqual(['737', '5', 'a', 'Interp', '1', 'i'], i5a1i.label) - self.assertEqual(0, len(i5a1i.children)) - - self.assertEqual(['737', '5', 'a', 'Interp', '1', 'ii'], i5a1ii.label) - self.assertEqual(1, len(i5a1ii.children)) - i5a1iia = i5a1ii.children[0] - self.assertEqual(['737', '5', 'a', 'Interp', '1', 'ii', 'A'], - i5a1iia.label) - self.assertEqual(3, len(i5a1iia.children)) - i5a1iia1, i5a1iia2, i5a1iia3 = i5a1iia.children - self.assertEqual(['737', '5', 'a', 'Interp', '1', 'ii', 'A', '1'], - i5a1iia1.label) - self.assertEqual(i5a1iia1.tagged_text, '1. More info') - self.assertEqual(0, len(i5a1iia1.children)) - self.assertEqual(['737', '5', 'a', 'Interp', '1', 'ii', 'A', '2'], - i5a1iia2.label) - self.assertEqual(i5a1iia2.tagged_text, '2. Second info') - self.assertEqual(0, len(i5a1iia2.children)) - self.assertEqual(['737', '5', 'a', 'Interp', '1', 'ii', 'A', '3'], - i5a1iia3.label) - self.assertEqual(i5a1iia3.tagged_text, '3. Keyterms') - self.assertEqual(0, len(i5a1iia3.children)) - - def test_build_supplement_tree_spacing(self): - """Integration test""" - xml = """ - - Supplement I to Part 737-Official Interpretations - Section 737.5 NASCAR -

1.Phrase. More Content

-

i. I likeice cream

-

A. Aaaaah

-

1.More info

-
""" - tree = interpretations.build_supplement_tree('737', - etree.fromstring(xml)) - self.assertEqual(['737', 'Interp'], tree.label) - self.assertEqual(1, len(tree.children)) - - s5 = tree.children[0] - self.assertEqual(1, len(s5.children)) - - s51 = s5.children[0] - self.assertEqual(s51.text.strip(), "1. Phrase. More Content") - self.assertEqual(1, len(s51.children)) - - s51i = s51.children[0] - self.assertEqual(s51i.text.strip(), "i. I like ice cream") - self.assertEqual(1, len(s51i.children)) - - s51ia = s51i.children[0] - self.assertEqual(s51ia.text.strip(), "A. Aaaaah") - self.assertEqual(1, len(s51ia.children)) - - s51ia1 = s51ia.children[0] - self.assertEqual(s51ia1.text.strip(), "1. More info") - self.assertEqual(0, len(s51ia1.children)) - - def test_build_supplement_tree_repeats(self): - """Integration test""" - xml = """ - - Supplement I to Part 737-Official Interpretations - Appendices G and H-Content -

1. G:H

- Appendix G -

1. G

- Appendix H -

1. H

-
""" - tree = interpretations.build_supplement_tree('737', - etree.fromstring(xml)) - self.assertEqual(['737', 'Interp'], tree.label) - self.assertEqual(3, len(tree.children)) - aGH, aG, aH = tree.children - - self.assertEqual(['737', 'G_H', 'Interp'], aGH.label) - self.assertEqual(['737', 'G', 'Interp'], aG.label) - self.assertEqual(['737', 'H', 'Interp'], aH.label) - - def test_build_supplement_tree_skip_levels(self): - xml = """ - - Supplement I to Part 737-Official Interpretations - Section 737.5 NASCAR - 5(a)(1)(i) Access Device -

1. Paragraph 111

- 5(b) Other Devices -

1. Paragraph 222

-
""" - tree = interpretations.build_supplement_tree('737', - etree.fromstring(xml)) - self.assertEqual(['737', 'Interp'], tree.label) - self.assertEqual(1, len(tree.children)) - - i5 = tree.children[0] - self.assertEqual(['737', '5', 'Interp'], i5.label) - self.assertEqual(2, len(i5.children)) - i5a, i5b = i5.children - - self.assertEqual(['737', '5', 'a', 'Interp'], i5a.label) - self.assertEqual(1, len(i5a.children)) - i5a1 = i5a.children[0] - - self.assertEqual(['737', '5', 'a', '1', 'Interp'], i5a1.label) - self.assertEqual(1, len(i5a1.children)) - i5a1i = i5a1.children[0] - - self.assertEqual(['737', '5', 'a', '1', 'i', 'Interp'], i5a1i.label) - self.assertEqual(1, len(i5a1i.children)) - - self.assertEqual(['737', '5', 'b', 'Interp'], i5b.label) - self.assertEqual(1, len(i5b.children)) - - def test_build_supplement_tree_appendix_paragraphs(self): - xml = """ - - Supplement I to Part 737-Official Interpretations - Appendix H - (b) bbbbbbb -

1. Paragraph b

- (b)(5) b5b5b5 -

1. Paragraph b5

-
""" - tree = interpretations.build_supplement_tree('737', - etree.fromstring(xml)) - self.assertEqual(['737', 'Interp'], tree.label) - self.assertEqual(1, len(tree.children)) - - ih = tree.children[0] - self.assertEqual(['737', 'H', 'Interp'], ih.label) - self.assertEqual(1, len(ih.children)) - - ihb = ih.children[0] - self.assertEqual(['737', 'H', 'b', 'Interp'], ihb.label) - self.assertEqual(2, len(ihb.children)) - - ihb1, ihb5 = ihb.children - self.assertEqual(['737', 'H', 'b', 'Interp', '1'], ihb1.label) - self.assertEqual(['737', 'H', 'b', '5', 'Interp'], ihb5.label) - - def test_build_supplement_intro_section(self): - """Integration test""" - xml = """ - - Supplement I to Part 737-Official Interpretations - Introduction -

1. Some content. (a) Badly named

-

(b) Badly named

- Subpart A - Section 737.13 -

13(a) Some Stuff!

-

1. 131313

- Appendix G -

1. G

-
""" - tree = interpretations.build_supplement_tree('737', - etree.fromstring(xml)) - self.assertEqual(['737', 'Interp'], tree.label) - self.assertEqual(3, len(tree.children)) - h1, s13, g = tree.children - - self.assertEqual(['737', 'Interp', 'h1'], h1.label) - self.assertEqual(['737', '13', 'Interp'], s13.label) - self.assertEqual(['737', 'G', 'Interp'], g.label) - - self.assertEqual(len(h1.children), 1) - self.assertEqual('1. Some content. (a) Badly named\n\n' + - '(b) Badly named', h1.children[0].text.strip()) - self.assertEqual(len(h1.children[0].children), 0) - - self.assertEqual(1, len(s13.children)) - self.assertEqual('13(a) Some Stuff!', s13.children[0].title) - - def test_process_inner_child(self): - xml = """ - - Title -

1. 111. i. iii

- -

A. AAA

-

1. eee

-
""" - node = etree.fromstring(xml).xpath('//HD')[0] - stack = tree_utils.NodeStack() - interpretations.process_inner_children(stack, node) - while stack.size() > 1: - stack.unwind() - n1 = stack.m_stack[0][0][1] - self.assertEqual(['1'], n1.label) - self.assertEqual(1, len(n1.children)) - - n1i = n1.children[0] - self.assertEqual(['1', 'i'], n1i.label) - self.assertEqual(n1i.text.strip(), 'i. iii') - self.assertEqual(1, len(n1i.children)) - - n1ia = n1i.children[0] - self.assertEqual(['1', 'i', 'A'], n1ia.label) - self.assertEqual(1, len(n1ia.children)) - - n1ia1 = n1ia.children[0] - self.assertEqual(['1', 'i', 'A', '1'], n1ia1.label) - self.assertEqual(0, len(n1ia1.children)) - - def test_process_inner_child_space(self): - xml = """ - - Title -

1. 111

-

i. See country A. Not that country

-
""" - node = etree.fromstring(xml).xpath('//HD')[0] - stack = tree_utils.NodeStack() - interpretations.process_inner_children(stack, node) - while stack.size() > 1: - stack.unwind() - n1 = stack.m_stack[0][0][1] - self.assertEqual(['1'], n1.label) - self.assertEqual(1, len(n1.children)) - - n1i = n1.children[0] - self.assertEqual(['1', 'i'], n1i.label) - self.assertEqual(0, len(n1i.children)) - - def test_process_inner_child_incorrect_xml(self): - xml = """ - - Title -

1. 111

-

i. iii

-

2. 222 Incorrect Content

-
""" - node = etree.fromstring(xml).xpath('//HD')[0] - stack = tree_utils.NodeStack() - interpretations.process_inner_children(stack, node) - while stack.size() > 1: - stack.unwind() - self.assertEqual(2, len(stack.m_stack[0])) - - def test_process_inner_child_no_marker(self): - xml = """ - - Title -

1. 111

-

i. iii

-

Howdy Howdy

-
""" - node = etree.fromstring(xml).xpath('//HD')[0] - stack = tree_utils.NodeStack() - interpretations.process_inner_children(stack, node) - while stack.size() > 1: - stack.unwind() - i1 = stack.m_stack[0][0][1] - self.assertEqual(1, len(i1.children)) - i1i = i1.children[0] - self.assertEqual(0, len(i1i.children)) - self.assertEqual(i1i.text.strip(), "i. iii\n\nHowdy Howdy") - - def test_process_inner_child_has_citation(self): - xml = """ - - Title -

1. Something something see comment 22(a)-2.i. please

-
""" - node = etree.fromstring(xml).xpath('//HD')[0] - stack = tree_utils.NodeStack() - interpretations.process_inner_children(stack, node) - while stack.size() > 1: - stack.unwind() - tree = stack.m_stack[0][0][1] - self.assertEqual(0, len(tree.children)) - - def test_process_inner_child_stars_and_inline(self): - xml = """ - - Title - -

2. Content. * * *

- -

xi. Content

- -
""" - node = etree.fromstring(xml).xpath('//HD')[0] - stack = tree_utils.NodeStack() - interpretations.process_inner_children(stack, node) - while stack.size() > 1: - stack.unwind() - tree = stack.m_stack[0][0][1] - self.assertEqual(['2'], tree.label) - self.assertEqual(1, len(tree.children)) - self.assertEqual(['2', 'xi'], tree.children[0].label) - self.assertEqual(0, len(tree.children[0].children)) - - def test_process_inner_child_collapsed_i(self): - xml = """ - - Title -

1. Keyterm text i. Content content

-

ii. Other stuff

-
""" - node = etree.fromstring(xml).xpath('//HD')[0] - stack = tree_utils.NodeStack() - interpretations.process_inner_children(stack, node) - while stack.size() > 1: - stack.unwind() - tree = stack.m_stack[0][0][1] - self.assertEqual(['1'], tree.label) - self.assertEqual(2, len(tree.children)) - self.assertEqual(['1', 'i'], tree.children[0].label) - self.assertEqual(0, len(tree.children[0].children)) - self.assertEqual(['1', 'ii'], tree.children[1].label) - self.assertEqual(0, len(tree.children[1].children)) - - def test_is_title(self): - titles = [ - "Some Title", - "Some Title", - "

Section 111.22

", - "

21(b) Contents.

", - "

31(r) Contents.

", - "

Section 111.31 Contents.

", - "

Paragraph 51(b)(1)(i).

", - ] - for title in titles: - self.assertTrue(interpretations.is_title(etree.fromstring(title))) - - non_titles = [ - "Some Header", - "Some Image", - "

Then Section 22.111

", - "

Section 222.33 More text

", - "

Keyterm. More text

", - ] - for non_title in non_titles: - self.assertFalse( - interpretations.is_title(etree.fromstring(non_title))) - - def test_collapsed_markers_matches(self): - self.assertEqual( - ['i'], - [m.group(1) for m in interpretations.collapsed_markers_matches( - '1. AAA - i. More', '1. AAA - i. More')]) - self.assertEqual( - ['1'], - [m.group(1) for m in interpretations.collapsed_markers_matches( - 'A. AAA: 1. More', 'A. AAA: 1. More')]) - for txt in ("1. Content - i.e. More content", - u"1. Stuff in quotes like, “N.A.”", - u"i. References appendix D, part I.A.1. Stuff" - "A. AAA - 1. More, without tags"): - self.assertEqual([], interpretations.collapsed_markers_matches( - txt, txt))