eregs · tadhg-ohiggins · Nov 18, 2016 · Nov 3, 2016 · Nov 4, 2016 · Nov 4, 2016
diff --git a/regparser/citations.py b/regparser/citations.py
@@ -28,9 +28,8 @@ def from_node(cls, node):
         determine which schema to follow. Node labels aren't as expressive as
         Label objects"""
         if (node.node_type == Node.APPENDIX or
-            (node.node_type == Node.INTERP and
-                len(node.label) > 2 and
-                node.label[1].isalpha())):
+                (node.node_type == Node.INTERP and len(node.label) > 2 and
+                 node.label[1].isalpha())):
             if len(node.label) > 2 and node.label[2].isdigit():
                 schema = cls.app_sect_schema
             else:
@@ -89,7 +88,7 @@ def copy(self, schema=None, **kwargs):
             new_settings = {}
 
         found_start = False
-        for field in (schema + Label.comment_schema):
+        for field in schema + Label.comment_schema:
             if field in kwargs:
                 found_start = True
                 new_settings[field] = kwargs[field]
@@ -121,7 +120,7 @@ def __repr__(self):
 
     def __eq__(self, other):
         """Equality if types match and fields match"""
-        return (type(other) == type(self) and
+        return (isinstance(other, Label) and
                 self.using_default_schema == other.using_default_schema and
                 self.settings == other.settings and
                 self.schema == other.schema and
@@ -270,8 +269,8 @@ def multiple(gram, comment):
         full_start = start
         if match.marker is not '':
             start = match.marker.pos[1]
-        label = filter(lambda l: l != '.', list(match)[3:])
-        label = dict(zip(['p1', 'p2', 'p3'], label))
+        label_parts = filter(lambda l: l != '.', list(match)[3:])
+        label = dict(zip(['p1', 'p2', 'p3'], label_parts))
         citations.append(ParagraphCitation(
             start, end, initial_label.copy(
                 appendix=match.appendix, appendix_section=match.a1,

diff --git a/regparser/commands/annual_editions.py b/regparser/commands/annual_editions.py
@@ -23,29 +23,29 @@ def last_versions(cfr_title, cfr_part):
         version = subpath.read()
         pub_date = annual.date_of_annual_after(cfr_title, version.effective)
         have_annual_edition[pub_date.year] = version.identifier
-    for year in sorted(have_annual_edition.keys()):
+    for year in sorted(have_annual_edition):
         if annual.find_volume(year, cfr_title, cfr_part):
             yield LastVersionInYear(have_annual_edition[year], year)
         else:
             logger.warning("%s edition for %s CFR %s not published yet",
                            year, cfr_title, cfr_part)
 
 
-def process_if_needed(cfr_title, cfr_part, last_versions):
+def process_if_needed(cfr_title, cfr_part, last_version_list):
     """Calculate dependencies between input and output files for these annual
     editions. If an output is missing or out of date, process it"""
     annual_path = entry.Annual(cfr_title, cfr_part)
     tree_path = entry.Tree(cfr_title, cfr_part)
     version_path = entry.Version(cfr_title, cfr_part)
     deps = dependency.Graph()
 
-    for last_version in last_versions:
+    for last_version in last_version_list:
         deps.add(tree_path / last_version.version_id,
                  version_path / last_version.version_id)
         deps.add(tree_path / last_version.version_id,
                  annual_path / last_version.year)
 
-    for last_version in last_versions:
+    for last_version in last_version_list:
         tree_entry = tree_path / last_version.version_id
         deps.validate_for(tree_entry)
         if deps.is_stale(tree_entry):

diff --git a/regparser/commands/citations.py b/regparser/commands/citations.py
@@ -14,14 +14,14 @@ def citations(input_files, unique):
     """Find all CFR citations in a file (or stdin)"""
     if not input_files:
         input_files = [codecs.getreader('utf8')(sys.stdin)]
-    for f in input_files:
-        text = f.read()
-        citations = cfr_citations(text, include_fill=True)
+    for file_ in input_files:
+        text = file_.read()
+        cits = cfr_citations(text, include_fill=True)
         if unique:
-            labels = {citation.label for citation in citations}
+            labels = {citation.label for citation in cits}
             for label in sorted(labels):
                 click.echo(label)
         else:
-            for citation in sorted(citations, key=lambda c: c.start):
+            for citation in sorted(cits, key=lambda c: c.start):
                 click.echo(u"{}: {}\n".format(
                     text[citation.start:citation.end], citation.label))
diff --git a/regparser/commands/compare_to.py b/regparser/commands/compare_to.py
@@ -39,7 +39,7 @@ def compare(local_path, remote_url, prompt=True):
     diff"""
     remote = path_to_json(remote_url)
     if remote is None:
-        logger.warn("Nonexistent: %s", remote_url)
+        logger.warning("Nonexistent: %s", remote_url)
         return None
 
     with open(local_path) as fp:

diff --git a/regparser/commands/diffs.py b/regparser/commands/diffs.py
@@ -1,6 +1,7 @@
-import click
 import logging
 
+import click
+
 from regparser.diff.tree import changes_between
 from regparser.index import dependency, entry
 

diff --git a/regparser/commands/fetch_annual_edition.py b/regparser/commands/fetch_annual_edition.py
@@ -20,9 +20,9 @@ def fetch_annual_edition(cfr_title, cfr_part, year):
 class AnnualEditionResolver(DependencyResolver):
     PATH_PARTS = (
         entry.Annual.PREFIX,
-        '(?P<cfr_title>\d+)',
-        '(?P<cfr_part>\d+)',
-        '(?P<year>\d{4})')
+        r'(?P<cfr_title>\d+)',
+        r'(?P<cfr_part>\d+)',
+        r'(?P<year>\d{4})')
 
     def resolution(self):
         args = [self.match.group('cfr_title'), self.match.group('cfr_part'),

diff --git a/regparser/commands/layers.py b/regparser/commands/layers.py
@@ -1,22 +1,29 @@
-import click
 import logging
 
+import click
+
 from regparser.commands import utils
 from regparser.index import dependency, entry
 from regparser.plugins import classes_by_shorthand
 import settings
 
 
-LAYER_CLASSES = {
-    doc_type: classes_by_shorthand(class_string_list)
-    for doc_type, class_string_list in settings.LAYERS.items()}
-# Also add in the "ALL" layers
-for doc_type in LAYER_CLASSES:
-    for layer_name, cls in LAYER_CLASSES['ALL'].items():
-        LAYER_CLASSES[doc_type][layer_name] = cls
 logger = logging.getLogger(__name__)
 
 
+def _init_classes():
+    """Avoid leaking state variables by wrapping `LAYER_CLASSES` construction
+    in a function"""
+    classes = {doc_type: classes_by_shorthand(class_string_list)
+               for doc_type, class_string_list in settings.LAYERS.items()}
+    # Also add in the "ALL" layers
+    for doc_type in classes:
+        for layer_name, cls in classes['ALL'].items():
+            classes[doc_type][layer_name] = cls
+    return classes
+LAYER_CLASSES = _init_classes()
+
+
 def stale_layers(doc_entry, doc_type):
     """Return the name of layer dependencies which are now stale. Limit to a
     particular doc_type"""
@@ -29,11 +36,13 @@ def stale_layers(doc_entry, doc_type):
         # Meta layer also depends on the version info
         deps.add(layer_dir / 'meta', entry.Version(*doc_entry.path))
 
+    stale = []
     for layer_name in LAYER_CLASSES[doc_type]:
         layer_entry = layer_dir / layer_name
         deps.validate_for(layer_entry)
         if deps.is_stale(layer_entry):
-            yield layer_name
+            stale.append(layer_name)
+    return stale
 
 
 def process_cfr_layers(stale_names, cfr_title, version_entry):

diff --git a/regparser/commands/preprocess_notice.py b/regparser/commands/preprocess_notice.py
@@ -56,8 +56,7 @@ def preprocess_notice(document_number):
             "regulation_id_numbers",
             "volume"
         ])
-    notice_xmls = list(notice_xmls_for_url(document_number,
-                                           meta['full_text_xml_url']))
+    notice_xmls = list(notice_xmls_for_url(meta['full_text_xml_url']))
     for notice_xml in notice_xmls:
         notice_xml.published = meta['publication_date']
         notice_xml.fr_volume = meta['volume']

diff --git a/regparser/commands/retry.py b/regparser/commands/retry.py
@@ -19,14 +19,14 @@
 def sub_commands():
     """Walk through the regparser.commands module looking for the presence of
     sub-commands"""
-    sub_commands = []
+    sub_cmds = []
     for _, command_name, _ in pkgutil.iter_modules(commands.__path__):
         # Note - this import will also discover DependencyResolvers
         module = import_module('regparser.commands.{}'.format(command_name))
         if hasattr(module, command_name):
-            sub_commands.append(
+            sub_cmds.append(
                 SubCommand(command_name, getattr(module, command_name)))
-    return sub_commands
+    return sub_cmds
 
 
 class RetryingCommand(click.MultiCommand):

diff --git a/regparser/commands/sxs_layers.py b/regparser/commands/sxs_layers.py
@@ -1,6 +1,7 @@
-import click
 import logging
 
+import click
+
 from regparser.index import dependency, entry
 from regparser.layer.section_by_section import SectionBySection
 
@@ -41,13 +42,12 @@ def sxs_layers(cfr_title, cfr_part):
     """Build SxS layers for all known versions."""
     logger.info("Build SxS layers - %s CFR %s", cfr_title, cfr_part)
 
-    tree_dir = entry.Tree(cfr_title, cfr_part)
-    for version_id in tree_dir:
+    for tree_entry in entry.Tree(cfr_title, cfr_part).sub_entries():
+        version_id = tree_entry.path[-1]
         if is_stale(cfr_title, cfr_part, version_id):
-            tree = (tree_dir / version_id).read()
+            tree = tree_entry.read()
             notices = [sxs.read() for sxs in previous_sxs(
                 cfr_title, cfr_part, version_id)]
             layer_json = SectionBySection(tree, notices).build()
-            entry.Layer.cfr(
-                cfr_title, cfr_part, version_id, 'analyses').write(
+            entry.Layer.cfr(cfr_title, cfr_part, version_id, 'analyses').write(
                 layer_json)
diff --git a/regparser/commands/versions.py b/regparser/commands/versions.py
@@ -36,24 +36,24 @@ def fetch_version_ids(cfr_title, cfr_part, notice_dir):
 def delays(xmls):
     """Find all changes to effective dates. Return the latest change to each
     version of the regulation"""
-    delays = {}
+    delay_map = {}
     # Sort so that later modifications override earlier ones
     for delayer in sorted(xmls, key=attrgetter('published')):
         for delay in delayer.delays():
             for delayed in filter(delay.modifies_notice_xml, xmls):
-                delays[delayed.version_id] = Delay(delayer.version_id,
-                                                   delay.delayed_until)
-    return delays
+                delay_map[delayed.version_id] = Delay(delayer.version_id,
+                                                      delay.delayed_until)
+    return delay_map
 
 
-def generate_dependencies(version_dir, version_ids, delays):
+def generate_dependencies(version_dir, version_ids, delays_by_version):
     """Creates a dependency graph and adds all dependencies for input xml and
     delays between notices"""
     notice_dir = entry.Notice()
     deps = dependency.Graph()
     for version_id in version_ids:
         deps.add(version_dir / version_id, notice_dir / version_id)
-    for delayed, delay in delays.items():
+    for delayed, delay in delays_by_version.items():
         deps.add(version_dir / delayed, notice_dir / delay.by)
     return deps
 
@@ -75,18 +75,18 @@ def write_to_disk(xml, version_entry, delay=None):
     version_entry.write(version)
 
 
-def write_if_needed(cfr_title, cfr_part, version_ids, xmls, delays):
+def write_if_needed(cfr_title, cfr_part, version_ids, xmls, delays_by_version):
     """All versions which are stale (either because they were never create or
     because their dependency has been updated) are written to disk. If any
     dependency is missing, an exception is raised"""
     version_dir = entry.FinalVersion(cfr_title, cfr_part)
-    deps = generate_dependencies(version_dir, version_ids, delays)
+    deps = generate_dependencies(version_dir, version_ids, delays_by_version)
     for version_id in version_ids:
         version_entry = version_dir / version_id
         deps.validate_for(version_entry)
         if deps.is_stale(version_entry):
             write_to_disk(xmls[version_id], version_entry,
-                          delays.get(version_id))
+                          delays_by_version.get(version_id))
 
 
 @click.command()

diff --git a/regparser/commands/write_to.py b/regparser/commands/write_to.py
@@ -1,6 +1,7 @@
-import click
 import logging
 
+import click
+
 from regparser.api_writer import Client
 from regparser.commands import utils
 from regparser.history.versions import Version
@@ -14,7 +15,7 @@
 def write_trees(client, only_title, only_part):
     for tree_entry in utils.relevant_paths(entry.Tree(), only_title,
                                            only_part):
-        cfr_title, cfr_part, version_id = tree_entry.path
+        _, cfr_part, version_id = tree_entry.path
         content = tree_entry.read()
         client.regulation(cfr_part, version_id).write(content)
 
@@ -25,7 +26,7 @@ def write_layers(client, only_title, only_part):
     layers."""
     for layer_entry in utils.relevant_paths(entry.Layer.cfr(), only_title,
                                             only_part):
-        _, cfr_title, cfr_part, version_id, layer_name = layer_entry.path
+        _, _, cfr_part, version_id, layer_name = layer_entry.path
         layer = layer_entry.read()
         doc_id = version_id + '/' + cfr_part
         client.layer(layer_name, 'cfr', doc_id).write(layer)
@@ -80,7 +81,7 @@ def write_notices(client, only_title, only_part):
 def write_diffs(client, only_title, only_part):
     for diff_entry in utils.relevant_paths(entry.Diff(), only_title,
                                            only_part):
-        cfr_title, cfr_part, lhs_id, rhs_id = diff_entry.path
+        _, cfr_part, lhs_id, rhs_id = diff_entry.path
         diff = diff_entry.read()
         client.diff(cfr_part, lhs_id, rhs_id).write(diff)
 

diff --git a/regparser/diff/tree.py b/regparser/diff/tree.py
@@ -9,7 +9,7 @@
 MODIFIED = 'modified'
 DELETED = 'deleted'
 
-_whitespace = re.compile(u'\s', re.UNICODE)    # aware of "thin" spaces, etc.
+_whitespace = re.compile(u'\\s', re.UNICODE)    # aware of "thin" spaces, etc.
 
 
 def _local_text_changes(lhs, rhs):

diff --git a/regparser/federalregister.py b/regparser/federalregister.py
@@ -1,10 +1,11 @@
+"""Fetch data from the Federal Register
+
+See https://www.federalregister.gov/developers/api/v1 - GET "search" method
+"""
 import logging
 
 from regparser.index.http_cache import http_client
 
-'''
-See https://www.federalregister.gov/developers/api/v1 - GET "search" method
-'''
 
 FR_BASE = "https://www.federalregister.gov"
 API_BASE = FR_BASE + "/api/v1/"

diff --git a/regparser/grammar/amdpar.py b/regparser/grammar/amdpar.py
@@ -136,8 +136,8 @@ def _paren_from_match(match):
 ).setParseAction(
     lambda m: tokens.Context(
         [None, 'Interpretations', None,
-         _paren_join([m.p2, m.p3, m.p4, m.plaintext_p5, m.plaintext_p6])
-         ], bool(m.certain)))
+         _paren_join([m.p2, m.p3, m.p4, m.plaintext_p5, m.plaintext_p6])],
+        bool(m.certain)))
 appendix = (
     context_certainty +
     unified.marker_appendix +

diff --git a/regparser/grammar/delays.py b/regparser/grammar/delays.py
@@ -30,7 +30,7 @@ class Delayed:
 
 effective_date = (
     utils.Marker("effective") + utils.Marker("date")
-).setParseAction(lambda: EffectiveDate())
+).setParseAction(EffectiveDate)
 
 
 notice_citation = (
@@ -40,7 +40,7 @@ class Delayed:
 ).setParseAction(lambda m: Notice(int(m[0]), int(m[1])))
 
 
-delayed = utils.Marker("delayed").setParseAction(lambda: Delayed())
+delayed = utils.Marker("delayed").setParseAction(Delayed)
 
 
 def int2Month(m):

diff --git a/regparser/grammar/tokens.py b/regparser/grammar/tokens.py
@@ -22,11 +22,12 @@ class Token(object):
     def match(self, *types, **fields):
         """Pattern match. self must be one of the types provided (if they
         were provided) and all of the fields must match (if fields were
-        provided)"""
-        return ((not types or any(isinstance(self, typ) for typ in types)) and
-                (not fields or all(hasattr(self, f) for f in fields)) and
-                (not fields or all(getattr(self, f) ==
-                                   v for f, v in fields.items())))
+        provided). If a successful match, returns self"""
+        type_match = not types or any(isinstance(self, typ) for typ in types)
+        has_fields = not fields or all(hasattr(self, f) for f in fields)
+        fields_match = not has_fields or all(
+            getattr(self, f) == v for f, v in fields.items())
+        return type_match and has_fields and fields_match and self
 
     def copy(self, **fields):
         """Helper method to create a new instance of this token with the