diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..f8b56708 --- /dev/null +++ b/.gitignore @@ -0,0 +1,79 @@ +# Copyright (c) 2014 GitHub, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +doc/_build/ + +# PyBuilder +target/ diff --git a/lint b/lint index b992ce69..76785df5 100755 --- a/lint +++ b/lint @@ -1,2 +1,3 @@ -#!/bin/sh -python -m lint_lib.lint +#!/usr/bin/env python +import lint_lib.lint as lint +lint.main() diff --git a/lint_lib/_vendor/Makefile b/lint_lib/_vendor/Makefile new file mode 100644 index 00000000..175e4166 --- /dev/null +++ b/lint_lib/_vendor/Makefile @@ -0,0 +1,13 @@ +all: clean vendor + +clean: + @# Delete vendored items + find . -maxdepth 1 -mindepth 1 -type d -exec rm -rf {} \; + +vendor: + @# Install vendored libraries + pip install -t . -r vendor.txt + + @# Cleanup .egg-info directories + rm -rf *.egg-info + rm -rf *.dist-info diff --git a/lint_lib/_vendor/__init__.py b/lint_lib/_vendor/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lint_lib/_vendor/funcparserlib/__init__.py b/lint_lib/_vendor/funcparserlib/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lint_lib/_vendor/funcparserlib/lexer.py b/lint_lib/_vendor/funcparserlib/lexer.py new file mode 100644 index 00000000..96cbd988 --- /dev/null +++ b/lint_lib/_vendor/funcparserlib/lexer.py @@ -0,0 +1,133 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2008/2013 Andrey Vlasovskikh +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +__all__ = ['make_tokenizer', 'Token', 'LexerError'] + +import re + + +class LexerError(Exception): + def __init__(self, place, msg): + self.place = place + self.msg = msg + + def __str__(self): + s = u'cannot tokenize data' + line, pos = self.place + return u'%s: %d,%d: "%s"' % (s, line, pos, self.msg) + + +class Token(object): + def __init__(self, type, value, start=None, end=None): + self.type = type + self.value = value + self.start = start + self.end = end + + def __repr__(self): + return u'Token(%r, %r)' % (self.type, self.value) + + def __eq__(self, other): + # FIXME: Case sensitivity is assumed here + return self.type == other.type and self.value == other.value + + def _pos_str(self): + if self.start is None or self.end is None: + return '' + else: + sl, sp = self.start + el, ep = self.end + return u'%d,%d-%d,%d:' % (sl, sp, el, ep) + + def __str__(self): + s = u"%s %s '%s'" % (self._pos_str(), self.type, self.value) + return s.strip() + + @property + def name(self): + return self.value + + def pformat(self): + return u"%s %s '%s'" % (self._pos_str().ljust(20), + self.type.ljust(14), + self.value) + + +def make_tokenizer(specs): + """[(str, (str, int?))] -> (str -> Iterable(Token))""" + + def compile_spec(spec): + name, args = spec + return name, re.compile(*args) + + compiled = [compile_spec(s) for s in specs] + + def match_specs(specs, str, i, position): + line, pos = position + for type, regexp in specs: + m = regexp.match(str, i) + if m is not None: + value = m.group() + nls = value.count(u'\n') + n_line = line + nls + if nls == 0: + n_pos = pos + len(value) + else: + n_pos = len(value) - value.rfind(u'\n') - 1 + return Token(type, value, (line, pos + 1), (n_line, n_pos)) + else: + errline = str.splitlines()[line - 1] + raise LexerError((line, pos + 1), errline) + + def f(str): + length = len(str) + line, pos = 1, 0 + i = 0 + while i < length: + t = match_specs(compiled, str, i, (line, pos)) + yield t + line, pos = t.end + i += len(t.value) + + return f + +# This is an example of a token spec. See also [this article][1] for a +# discussion of searching for multiline comments using regexps (including `*?`). +# +# [1]: http://ostermiller.org/findcomment.html +_example_token_specs = [ + ('COMMENT', (r'\(\*(.|[\r\n])*?\*\)', re.MULTILINE)), + ('COMMENT', (r'\{(.|[\r\n])*?\}', re.MULTILINE)), + ('COMMENT', (r'//.*',)), + ('NL', (r'[\r\n]+',)), + ('SPACE', (r'[ \t\r\n]+',)), + ('NAME', (r'[A-Za-z_][A-Za-z_0-9]*',)), + ('REAL', (r'[0-9]+\.[0-9]*([Ee][+\-]?[0-9]+)*',)), + ('INT', (r'[0-9]+',)), + ('INT', (r'\$[0-9A-Fa-f]+',)), + ('OP', (r'(\.\.)|(<>)|(<=)|(>=)|(:=)|[;,=\(\):\[\]\.+\-<>\*/@\^]',)), + ('STRING', (r"'([^']|(''))*'",)), + ('CHAR', (r'#[0-9]+',)), + ('CHAR', (r'#\$[0-9A-Fa-f]+',)), +] +#tokenize = make_tokenizer(_example_token_specs) diff --git a/lint_lib/_vendor/funcparserlib/parser.py b/lint_lib/_vendor/funcparserlib/parser.py new file mode 100644 index 00000000..ab7af356 --- /dev/null +++ b/lint_lib/_vendor/funcparserlib/parser.py @@ -0,0 +1,408 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2008/2013 Andrey Vlasovskikh +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +"""A recurisve descent parser library based on functional combinators. + +Basic combinators are taken from Harrison's book ["Introduction to Functional +Programming"][1] and translated from ML into Python. See also [a Russian +translation of the book][2]. + + [1]: http://www.cl.cam.ac.uk/teaching/Lectures/funprog-jrh-1996/ + [2]: http://code.google.com/p/funprog-ru/ + +A parser `p` is represented by a function of type: + + p :: Sequence(a), State -> (b, State) + +that takes as its input a sequence of tokens of arbitrary type `a` and a +current parsing state and return a pair of a parsed token of arbitrary type +`b` and the new parsing state. + +The parsing state includes the current position in the sequence being parsed and +the position of the rightmost token that has been consumed while parsing. + +Parser functions are wrapped into an object of the class `Parser`. This class +implements custom operators `+` for sequential composition of parsers, `|` for +choice composition, `>>` for transforming the result of parsing. The method +`Parser.parse` provides an easier way for invoking a parser hiding details +related to a parser state: + + Parser.parse :: Parser(a, b), Sequence(a) -> b + +Altough this module is able to deal with a sequences of any kind of objects, the +recommended way of using it is applying a parser to a `Sequence(Token)`. +`Token` objects are produced by a regexp-based tokenizer defined in +`funcparserlib.lexer`. By using it this way you get more readable parsing error +messages (as `Token` objects contain their position in the source file) and good +separation of lexical and syntactic levels of the grammar. See examples for more +info. + +Debug messages are emitted via a `logging.Logger` object named +`"funcparserlib"`. +""" + +__all__ = [ + 'some', 'a', 'many', 'pure', 'finished', 'maybe', 'skip', 'oneplus', + 'forward_decl', 'NoParseError', +] + +import logging + +log = logging.getLogger('funcparserlib') + +debug = False + + +class Parser(object): + """A wrapper around a parser function that defines some operators for parser + composition. + """ + + def __init__(self, p): + """Wraps a parser function p into an object.""" + self.define(p) + + def named(self, name): + """Specifies the name of the parser for more readable parsing log.""" + self.name = name + return self + + def define(self, p): + """Defines a parser wrapped into this object.""" + f = getattr(p, 'run', p) + if debug: + setattr(self, '_run', f) + else: + setattr(self, 'run', f) + self.named(getattr(p, 'name', p.__doc__)) + + def run(self, tokens, s): + """Sequence(a), State -> (b, State) + + Runs a parser wrapped into this object. + """ + if debug: + log.debug(u'trying %s' % self.name) + return self._run(tokens, s) + + def _run(self, tokens, s): + raise NotImplementedError(u'you must define() a parser') + + def parse(self, tokens): + """Sequence(a) -> b + + Applies the parser to a sequence of tokens producing a parsing result. + + It provides a way to invoke a parser hiding details related to the + parser state. Also it makes error messages more readable by specifying + the position of the rightmost token that has been reached. + """ + try: + (tree, _) = self.run(tokens, State()) + return tree + except NoParseError, e: + max = e.state.max + if len(tokens) > max: + tok = tokens[max] + else: + tok = u'' + raise NoParseError(u'%s: %s' % (e.msg, tok), e.state) + + def __add__(self, other): + """Parser(a, b), Parser(a, c) -> Parser(a, _Tuple(b, c)) + + A sequential composition of parsers. + + NOTE: The real type of the parsed value isn't always such as specified. + Here we use dynamic typing for ignoring the tokens that are of no + interest to the user. Also we merge parsing results into a single _Tuple + unless the user explicitely prevents it. See also skip and >> + combinators. + """ + + def magic(v1, v2): + vs = [v for v in [v1, v2] if not isinstance(v, _Ignored)] + if len(vs) == 1: + return vs[0] + elif len(vs) == 2: + if isinstance(vs[0], _Tuple): + return _Tuple(v1 + (v2,)) + else: + return _Tuple(vs) + else: + return _Ignored(()) + + @Parser + def _add(tokens, s): + (v1, s2) = self.run(tokens, s) + (v2, s3) = other.run(tokens, s2) + return magic(v1, v2), s3 + + # or in terms of bind and pure: + # _add = self.bind(lambda x: other.bind(lambda y: pure(magic(x, y)))) + _add.name = u'(%s , %s)' % (self.name, other.name) + return _add + + def __or__(self, other): + """Parser(a, b), Parser(a, c) -> Parser(a, b or c) + + A choice composition of two parsers. + + NOTE: Here we are not providing the exact type of the result. In a + statically typed langage something like Either b c could be used. See + also + combinator. + """ + + @Parser + def _or(tokens, s): + try: + return self.run(tokens, s) + except NoParseError, e: + return other.run(tokens, State(s.pos, e.state.max)) + + _or.name = u'(%s | %s)' % (self.name, other.name) + return _or + + def __rshift__(self, f): + """Parser(a, b), (b -> c) -> Parser(a, c) + + Given a function from b to c, transforms a parser of b into a parser of + c. It is useful for transorming a parser value into another value for + making it a part of a parse tree or an AST. + + This combinator may be thought of as a functor from b -> c to Parser(a, + b) -> Parser(a, c). + """ + + @Parser + def _shift(tokens, s): + (v, s2) = self.run(tokens, s) + return f(v), s2 + + # or in terms of bind and pure: + # _shift = self.bind(lambda x: pure(f(x))) + _shift.name = u'(%s)' % (self.name,) + return _shift + + def bind(self, f): + """Parser(a, b), (b -> Parser(a, c)) -> Parser(a, c) + + NOTE: A monadic bind function. It is used internally to implement other + combinators. Functions bind and pure make the Parser a Monad. + """ + + @Parser + def _bind(tokens, s): + (v, s2) = self.run(tokens, s) + return f(v).run(tokens, s2) + + _bind.name = u'(%s >>=)' % (self.name,) + return _bind + + +class State(object): + """A parsing state that is maintained basically for error reporting. + + It consists of the current position pos in the sequence being parsed and + the position max of the rightmost token that has been consumed while + parsing. + """ + + def __init__(self, pos=0, max=0): + self.pos = pos + self.max = max + + def __str__(self): + return unicode((self.pos, self.max)) + + def __repr__(self): + return u'State(%r, %r)' % (self.pos, self.max) + + +class NoParseError(Exception): + def __init__(self, msg=u'', state=None): + self.msg = msg + self.state = state + + def __str__(self): + return self.msg + + +class _Tuple(tuple): + pass + + +class _Ignored(object): + def __init__(self, value): + self.value = value + + def __repr__(self): + return u'_Ignored(%s)' % repr(self.value) + + +@Parser +def finished(tokens, s): + """Parser(a, None) + + Throws an exception if any tokens are left in the input unparsed. + """ + if s.pos >= len(tokens): + return None, s + else: + raise NoParseError(u'should have reached ', s) + + +finished.name = u'finished' + + +def many(p): + """Parser(a, b) -> Parser(a, [b]) + + Returns a parser that infinitely applies the parser p to the input sequence + of tokens while it successfully parsers them. The resulting parser returns a + list of parsed values. + """ + + @Parser + def _many(tokens, s): + """Iterative implementation preventing the stack overflow.""" + res = [] + try: + while True: + (v, s) = p.run(tokens, s) + res.append(v) + except NoParseError, e: + return res, State(s.pos, e.state.max) + + _many.name = u'{ %s }' % p.name + return _many + + +def some(pred): + """(a -> bool) -> Parser(a, a) + + Returns a parser that parses a token if it satisfies a predicate pred. + """ + + @Parser + def _some(tokens, s): + if s.pos >= len(tokens): + raise NoParseError(u'no tokens left in the stream', s) + else: + t = tokens[s.pos] + if pred(t): + pos = s.pos + 1 + s2 = State(pos, max(pos, s.max)) + if debug: + log.debug(u'*matched* "%s", new state = %s' % (t, s2)) + return t, s2 + else: + if debug: + log.debug(u'failed "%s", state = %s' % (t, s)) + raise NoParseError(u'got unexpected token', s) + + _some.name = u'(some)' + return _some + + +def a(value): + """Eq(a) -> Parser(a, a) + + Returns a parser that parses a token that is equal to the value value. + """ + name = getattr(value, 'name', value) + return some(lambda t: t == value).named(u'(a "%s")' % (name,)) + + +def pure(x): + @Parser + def _pure(_, s): + return x, s + + _pure.name = u'(pure %r)' % (x,) + return _pure + + +def maybe(p): + """Parser(a, b) -> Parser(a, b or None) + + Returns a parser that retuns None if parsing fails. + + NOTE: In a statically typed language, the type Maybe b could be more + approprieate. + """ + return (p | pure(None)).named(u'[ %s ]' % (p.name,)) + + +def skip(p): + """Parser(a, b) -> Parser(a, _Ignored(b)) + + Returns a parser which results are ignored by the combinator +. It is useful + for throwing away elements of concrete syntax (e. g. ",", ";"). + """ + return p >> _Ignored + + +def oneplus(p): + """Parser(a, b) -> Parser(a, [b]) + + Returns a parser that applies the parser p one or more times. + """ + q = p + many(p) >> (lambda x: [x[0]] + x[1]) + return q.named(u'(%s , { %s })' % (p.name, p.name)) + + +def with_forward_decls(suspension): + """(None -> Parser(a, b)) -> Parser(a, b) + + Returns a parser that computes itself lazily as a result of the suspension + provided. It is needed when some parsers contain forward references to + parsers defined later and such references are cyclic. See examples for more + details. + """ + + @Parser + def f(tokens, s): + return suspension().run(tokens, s) + + return f + + +def forward_decl(): + """None -> Parser(?, ?) + + Returns an undefined parser that can be used as a forward declaration. You + will be able to define() it when all the parsers it depends on are + available. + """ + + @Parser + def f(tokens, s): + raise NotImplementedError(u'you must define() a forward_decl somewhere') + + return f + + +if __name__ == '__main__': + import doctest + doctest.testmod() diff --git a/lint_lib/_vendor/funcparserlib/tests/__init__.py b/lint_lib/_vendor/funcparserlib/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/lint_lib/_vendor/funcparserlib/tests/dot.py b/lint_lib/_vendor/funcparserlib/tests/dot.py new file mode 100644 index 00000000..4ede9f41 --- /dev/null +++ b/lint_lib/_vendor/funcparserlib/tests/dot.py @@ -0,0 +1,205 @@ +# -*- coding: utf-8 -*- + +r"""A DOT language parser using funcparserlib. + +The parser is based on [the DOT grammar][1]. It is pretty complete with a few +not supported things: + +* String ecapes `\"` +* Ports and compass points +* XML identifiers + +At the moment, the parser builds only a parse tree, not an abstract syntax tree +(AST) or an API for dealing with DOT. + + [1]: http://www.graphviz.org/doc/info/lang.html +""" + +import sys +import os +from re import MULTILINE +from funcparserlib.util import pretty_tree +from funcparserlib.lexer import make_tokenizer, Token, LexerError +from funcparserlib.parser import (some, a, maybe, many, finished, skip, + oneplus, forward_decl, NoParseError) + +try: + from collections import namedtuple +except ImportError: + # Basic implementation of namedtuple for 2.1 < Python < 2.6 + def namedtuple(name, fields): + """Only space-delimited fields are supported.""" + + def prop(i, name): + return name, property(lambda self: self[i]) + + def new(cls, *args, **kwargs): + args = list(args) + n = len(args) + for i in range(n, len(names)): + name = names[i - n] + args.append(kwargs[name]) + return tuple.__new__(cls, args) + + names = dict((i, f) for i, f in enumerate(fields.split(u' '))) + methods = dict(prop(i, f) for i, f in enumerate(fields.split(u' '))) + methods.update({ + '__new__': new, + '__repr__': lambda self: u'%s(%s)' % ( + name, + u', '.join(u'%s=%r' % ( + f, getattr(self, f)) for f in fields.split(u' ')))}) + return type(name, (tuple,), methods) + +ENCODING = u'UTF-8' + +Graph = namedtuple('Graph', 'strict type id stmts') +SubGraph = namedtuple('SubGraph', 'id stmts') +Node = namedtuple('Node', 'id attrs') +Attr = namedtuple('Attr', 'name value') +Edge = namedtuple('Edge', 'nodes attrs') +DefAttrs = namedtuple('DefAttrs', 'object attrs') + + +def tokenize(str): + """str -> Sequence(Token)""" + specs = [ + (u'Comment', (ur'/\*(.|[\r\n])*?\*/', MULTILINE)), + (u'Comment', (ur'//.*',)), + (u'NL', (ur'[\r\n]+',)), + (u'Space', (ur'[ \t\r\n]+',)), + (u'Name', (ur'[A-Za-z\200-\377_][A-Za-z\200-\377_0-9]*',)), + (u'Op', (ur'[{};,=\[\]]|(->)|(--)',)), + (u'Number', (ur'-?(\.[0-9]+)|([0-9]+(\.[0-9]*)?)',)), + (u'String', (ur'"[^"]*"',)), # '\"' escapes are ignored + ] + useless = [u'Comment', u'NL', u'Space'] + t = make_tokenizer(specs) + return [x for x in t(str) if x.type not in useless] + + +def parse(seq): + """Sequence(Token) -> object""" + unarg = lambda f: lambda args: f(*args) + tokval = lambda x: x.value + flatten = lambda list: sum(list, []) + n = lambda s: a(Token(u'Name', s)) >> tokval + op = lambda s: a(Token(u'Op', s)) >> tokval + op_ = lambda s: skip(op(s)) + id_types = [u'Name', u'Number', u'String'] + id = some(lambda t: t.type in id_types).named(u'id') >> tokval + make_graph_attr = lambda args: DefAttrs(u'graph', [Attr(*args)]) + make_edge = lambda x, xs, attrs: Edge([x] + xs, attrs) + + node_id = id # + maybe(port) + a_list = ( + id + + maybe(op_(u'=') + id) + + skip(maybe(op(u','))) + >> unarg(Attr)) + attr_list = ( + many(op_(u'[') + many(a_list) + op_(u']')) + >> flatten) + attr_stmt = ( + (n(u'graph') | n(u'node') | n(u'edge')) + + attr_list + >> unarg(DefAttrs)) + graph_attr = id + op_(u'=') + id >> make_graph_attr + node_stmt = node_id + attr_list >> unarg(Node) + # We use a forward_decl becaue of circular definitions like (stmt_list -> + # stmt -> subgraph -> stmt_list) + subgraph = forward_decl() + edge_rhs = skip(op(u'->') | op(u'--')) + (subgraph | node_id) + edge_stmt = ( + (subgraph | node_id) + + oneplus(edge_rhs) + + attr_list + >> unarg(make_edge)) + stmt = ( + attr_stmt + | edge_stmt + | subgraph + | graph_attr + | node_stmt + ) + stmt_list = many(stmt + skip(maybe(op(u';')))) + subgraph.define( + skip(n(u'subgraph')) + + maybe(id) + + op_(u'{') + + stmt_list + + op_(u'}') + >> unarg(SubGraph)) + graph = ( + maybe(n(u'strict')) + + maybe(n(u'graph') | n(u'digraph')) + + maybe(id) + + op_(u'{') + + stmt_list + + op_(u'}') + >> unarg(Graph)) + dotfile = graph + skip(finished) + + return dotfile.parse(seq) + + +def pretty_parse_tree(x): + """object -> str""" + Pair = namedtuple(u'Pair', u'first second') + p = lambda x, y: Pair(x, y) + + def kids(x): + """object -> list(object)""" + if isinstance(x, (Graph, SubGraph)): + return [p(u'stmts', x.stmts)] + elif isinstance(x, (Node, DefAttrs)): + return [p(u'attrs', x.attrs)] + elif isinstance(x, Edge): + return [p(u'nodes', x.nodes), p(u'attrs', x.attrs)] + elif isinstance(x, Pair): + return x.second + else: + return [] + + def show(x): + """object -> str""" + if isinstance(x, Pair): + return x.first + elif isinstance(x, Graph): + return u'Graph [id=%s, strict=%r, type=%s]' % ( + x.id, x.strict is not None, x.type) + elif isinstance(x, SubGraph): + return u'SubGraph [id=%s]' % (x.id,) + elif isinstance(x, Edge): + return u'Edge' + elif isinstance(x, Attr): + return u'Attr [name=%s, value=%s]' % (x.name, x.value) + elif isinstance(x, DefAttrs): + return u'DefAttrs [object=%s]' % (x.object,) + elif isinstance(x, Node): + return u'Node [id=%s]' % (x.id,) + else: + return unicode(x) + + return pretty_tree(x, kids, show) + + +def main(): + #import logging + #logging.basicConfig(level=logging.DEBUG) + #import funcparserlib + #funcparserlib.parser.debug = True + try: + stdin = os.fdopen(sys.stdin.fileno(), u'rb') + input = stdin.read().decode(ENCODING) + tree = parse(tokenize(input)) + #print pformat(tree) + print pretty_parse_tree(tree).encode(ENCODING) + except (NoParseError, LexerError), e: + msg = (u'syntax error: %s' % e).encode(ENCODING) + print >> sys.stderr, msg + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/lint_lib/_vendor/funcparserlib/tests/json.py b/lint_lib/_vendor/funcparserlib/tests/json.py new file mode 100644 index 00000000..0b45e831 --- /dev/null +++ b/lint_lib/_vendor/funcparserlib/tests/json.py @@ -0,0 +1,144 @@ +# -*- coding: utf-8 -*- + +r"""A JSON parser using funcparserlib. + +The parser is based on [the JSON grammar][1]. + + [1]: http://tools.ietf.org/html/rfc4627 +""" + +import sys +import os +import re +import logging +from re import VERBOSE +from pprint import pformat +from funcparserlib.lexer import make_tokenizer, Token, LexerError +from funcparserlib.parser import (some, a, maybe, many, finished, skip, + forward_decl, NoParseError) + +ENCODING = u'UTF-8' +regexps = { + u'escaped': ur''' + \\ # Escape + ((?P["\\/bfnrt]) # Standard escapes + | (u(?P[0-9A-Fa-f]{4}))) # uXXXX + ''', + u'unescaped': ur''' + [^"\\] # Unescaped: avoid ["\\] + ''', +} +re_esc = re.compile(regexps[u'escaped'], VERBOSE) + + +def tokenize(str): + """str -> Sequence(Token)""" + specs = [ + (u'Space', (ur'[ \t\r\n]+',)), + (u'String', (ur'"(%(unescaped)s | %(escaped)s)*"' % regexps, VERBOSE)), + (u'Number', (ur''' + -? # Minus + (0|([1-9][0-9]*)) # Int + (\.[0-9]+)? # Frac + ([Ee][+-][0-9]+)? # Exp + ''', VERBOSE)), + (u'Op', (ur'[{}\[\]\-,:]',)), + (u'Name', (ur'[A-Za-z_][A-Za-z_0-9]*',)), + ] + useless = [u'Space'] + t = make_tokenizer(specs) + return [x for x in t(str) if x.type not in useless] + + +def parse(seq): + """Sequence(Token) -> object""" + const = lambda x: lambda _: x + tokval = lambda x: x.value + toktype = lambda t: some(lambda x: x.type == t) >> tokval + op = lambda s: a(Token(u'Op', s)) >> tokval + op_ = lambda s: skip(op(s)) + n = lambda s: a(Token(u'Name', s)) >> tokval + + def make_array(n): + if n is None: + return [] + else: + return [n[0]] + n[1] + + def make_object(n): + return dict(make_array(n)) + + def make_number(n): + try: + return int(n) + except ValueError: + return float(n) + + def unescape(s): + std = { + u'"': u'"', u'\\': u'\\', u'/': u'/', u'b': u'\b', u'f': u'\f', + u'n': u'\n', u'r': u'\r', u't': u'\t', + } + + def sub(m): + if m.group(u'standard') is not None: + return std[m.group(u'standard')] + else: + return unichr(int(m.group(u'unicode'), 16)) + + return re_esc.sub(sub, s) + + def make_string(n): + return unescape(n[1:-1]) + + null = n(u'null') >> const(None) + true = n(u'true') >> const(True) + false = n(u'false') >> const(False) + number = toktype(u'Number') >> make_number + string = toktype(u'String') >> make_string + value = forward_decl() + member = string + op_(u':') + value >> tuple + object = ( + op_(u'{') + + maybe(member + many(op_(u',') + member)) + + op_(u'}') + >> make_object) + array = ( + op_(u'[') + + maybe(value + many(op_(u',') + value)) + + op_(u']') + >> make_array) + value.define( + null + | true + | false + | object + | array + | number + | string) + json_text = object | array + json_file = json_text + skip(finished) + + return json_file.parse(seq) + + +def loads(s): + """str -> object""" + return parse(tokenize(s)) + + +def main(): + logging.basicConfig(level=logging.DEBUG) + try: + stdin = os.fdopen(sys.stdin.fileno(), 'rb') + input = stdin.read().decode(ENCODING) + tree = loads(input) + print pformat(tree) + except (NoParseError, LexerError), e: + msg = (u'syntax error: %s' % e).encode(ENCODING) + print >> sys.stderr, msg + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/lint_lib/_vendor/funcparserlib/tests/test_dot.py b/lint_lib/_vendor/funcparserlib/tests/test_dot.py new file mode 100644 index 00000000..89880194 --- /dev/null +++ b/lint_lib/_vendor/funcparserlib/tests/test_dot.py @@ -0,0 +1,167 @@ +# -*- coding: utf-8 -*- + +import unittest +from funcparserlib.parser import NoParseError +from funcparserlib.lexer import LexerError +from dot import parse, tokenize, Graph, Edge, SubGraph, DefAttrs, Attr, Node + + +class DotTest(unittest.TestCase): + def t(self, data, expected=None): + self.assertEqual(parse(tokenize(data)), expected) + + def test_comments(self): + self.t(u''' + /* комм 1 */ + graph /* комм 4 */ g1 { + // комм 2 /* комм 3 */ + } + // комм 5 + ''', + Graph(strict=None, type=u'graph', id=u'g1', stmts=[])) + + def test_connected_subgraph(self): + self.t(u''' + digraph g1 { + n1 -> n2 -> + subgraph n3 { + nn1 -> nn2 -> nn3; + nn3 -> nn1; + }; + subgraph n3 {} -> n1; + } + ''', + Graph(strict=None, type=u'digraph', id=u'g1', stmts=[ + Edge( + nodes=[ + u'n1', + u'n2', + SubGraph(id=u'n3', stmts=[ + Edge( + nodes=[u'nn1', u'nn2', u'nn3'], + attrs=[]), + Edge( + nodes=[u'nn3', u'nn1'], + attrs=[])])], + attrs=[]), + Edge( + nodes=[ + SubGraph(id=u'n3', stmts=[]), + u'n1'], + attrs=[])])) + + def test_default_attrs(self): + self.t(u''' + digraph g1 { + page="3,3"; + graph [rotate=90]; + node [shape=box, color="#0000ff"]; + edge [style=dashed]; + n1 -> n2 -> n3; + n3 -> n1; + } + ''', + Graph(strict=None, type=u'digraph', id=u'g1', stmts=[ + DefAttrs(object=u'graph', attrs=[ + Attr(name=u'page', value=u'"3,3"')]), + DefAttrs(object=u'graph', attrs=[ + Attr(name=u'rotate', value=u'90')]), + DefAttrs(object=u'node', attrs=[ + Attr(name=u'shape', value=u'box'), + Attr(name=u'color', value=u'"#0000ff"')]), + DefAttrs(object=u'edge', attrs=[ + Attr(name=u'style', value=u'dashed')]), + Edge(nodes=[u'n1', u'n2', u'n3'], attrs=[]), + Edge(nodes=[u'n3', u'n1'], attrs=[])])) + + def test_empty_graph(self): + self.t(u''' + graph g1 {} + ''', + Graph(strict=None, type=u'graph', id=u'g1', stmts=[])) + + def test_few_attrs(self): + self.t(u''' + digraph g1 { + n1 [attr1, attr2 = value2]; + } + ''', + Graph(strict=None, type=u'digraph', id=u'g1', stmts=[ + Node(id=u'n1', attrs=[ + Attr(name=u'attr1', value=None), + Attr(name=u'attr2', value=u'value2')])])) + + def test_few_nodes(self): + self.t(u''' + graph g1 { + n1; + n2; + n3 + } + ''', + Graph(strict=None, type=u'graph', id=u'g1', stmts=[ + Node(id=u'n1', attrs=[]), + Node(id=u'n2', attrs=[]), + Node(id=u'n3', attrs=[])])) + + def test_illegal_comma(self): + try: + self.t(u''' + graph g1 { + n1; + n2; + n3, + } + ''') + except NoParseError: + pass + else: + self.fail('must raise NoParseError') + + def test_null(self): + try: + self.t(u'') + except NoParseError: + pass + else: + self.fail('must raise NoParseError') + + def test_simple_cycle(self): + self.t(u''' + digraph g1 { + n1 -> n2 [w=5]; + n2 -> n3 [w=10]; + n3 -> n1 [w=7]; + } + ''', + Graph(strict=None, type=u'digraph', id=u'g1', stmts=[ + Edge(nodes=[u'n1', u'n2'], attrs=[ + Attr(name=u'w', value=u'5')]), + Edge(nodes=[u'n2', u'n3'], attrs=[ + Attr(name=u'w', value=u'10')]), + Edge(nodes=[u'n3', u'n1'], attrs=[ + Attr(name=u'w', value=u'7')])])) + + def test_single_unicode_char(self): + try: + self.t(u'ф') + except LexerError: + pass + else: + self.fail('must raise LexerError') + + def test_unicode_names(self): + self.t(u''' + digraph g1 { + n1 -> "Медведь" [label="Поехали!"]; + "Медведь" -> n3 [label="Добро пожаловать!"]; + n3 -> n1 ["Водка"="Селёдка"]; + } + ''', + Graph(strict=None, type=u'digraph', id=u'g1', stmts=[ + Edge(nodes=[u'n1', u'"Медведь"'], attrs=[ + Attr(name=u'label', value=u'"Поехали!"')]), + Edge(nodes=[u'"Медведь"', u'n3'], attrs=[ + Attr(name=u'label', value=u'"Добро пожаловать!"')]), + Edge(nodes=[u'n3', u'n1'], attrs=[ + Attr(name=u'"Водка"', value=u'"Селёдка"')])])) diff --git a/lint_lib/_vendor/funcparserlib/tests/test_json.py b/lint_lib/_vendor/funcparserlib/tests/test_json.py new file mode 100644 index 00000000..f0b225aa --- /dev/null +++ b/lint_lib/_vendor/funcparserlib/tests/test_json.py @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- + +import unittest +from funcparserlib.parser import NoParseError +from funcparserlib.lexer import LexerError +import json + + +class JsonTest(unittest.TestCase): + def t(self, data, expected=None): + self.assertEqual(json.loads(data), expected) + + def test_1_array(self): + self.t(u'[1]', [1]) + + def test_1_object(self): + self.t(u'{"foo": "bar"}', {u'foo': u'bar'}) + + def test_bool_and_null(self): + self.t(u'[null, true, false]', [None, True, False]) + + def test_empty_array(self): + self.t(u'[]', []) + + def test_empty_object(self): + self.t(u'{}', {}) + + def test_many_array(self): + self.t(u'[1, 2, [3, 4, 5], 6]', [1, 2, [3, 4, 5], 6]) + + def test_many_object(self): + self.t(u''' + { + "foo": 1, + "bar": + { + "baz": 2, + "quux": [true, false], + "{}": {} + }, + "spam": "eggs" + } + ''', { + u'foo': 1, + u'bar': { + u'baz': 2, + u'quux': [True, False], + u'{}': {}, + }, + u'spam': u'eggs', + }) + + def test_null(self): + try: + self.t(u'') + except NoParseError: + pass + else: + self.fail('must raise NoParseError') + + def test_numbers(self): + self.t(u'''\ + [ + 0, 1, -1, 14, -14, 65536, + 0.0, 3.14, -3.14, -123.456, + 6.67428e-11, -1.602176e-19, 6.67428E-11 + ] + ''', [ + 0, 1, -1, 14, -14, 65536, + 0.0, 3.14, -3.14, -123.456, + 6.67428e-11, -1.602176e-19, 6.67428E-11, + ]) + + def test_strings(self): + self.t(ur''' + [ + ["", "hello", "hello world!"], + ["привет, мир!", "λx.x"], + ["\"", "\\", "\/", "\b", "\f", "\n", "\r", "\t"], + ["\u0000", "\u03bb", "\uffff", "\uFFFF"], + ["вот функция идентичности:\nλx.x\nили так:\n\u03bbx.x"] + ] + ''', [ + [u'', u'hello', u'hello world!'], + [u'привет, мир!', u'λx.x'], + [u'"', u'\\', u'/', u'\x08', u'\x0c', u'\n', u'\r', u'\t'], + [u'\u0000', u'\u03bb', u'\uffff', u'\uffff'], + [u'вот функция идентичности:\nλx.x\nили так:\n\u03bbx.x'], + ]) + + def test_toplevel_string(self): + try: + self.t(u'неправильно') + except LexerError: + pass + else: + self.fail('must raise LexerError') diff --git a/lint_lib/_vendor/funcparserlib/tests/test_parsing.py b/lint_lib/_vendor/funcparserlib/tests/test_parsing.py new file mode 100644 index 00000000..4c71384a --- /dev/null +++ b/lint_lib/_vendor/funcparserlib/tests/test_parsing.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- + +import unittest +from funcparserlib.lexer import make_tokenizer, LexerError, Token +from funcparserlib.parser import a, many, some, skip, NoParseError + + +class ParsingTest(unittest.TestCase): + # Issue 31 + def test_many_backtracking(self): + x = a(u'x') + y = a(u'y') + expr = many(x + y) + x + x + self.assertEqual(expr.parse(u'xyxyxx'), + ([(u'x', u'y'), (u'x', u'y')], u'x', u'x')) + + # Issue 14 + def test_error_info(self): + tokenize = make_tokenizer([ + (u'keyword', (ur'(is|end)',)), + (u'id', (ur'[a-z]+',)), + (u'space', (ur'[ \t]+',)), + (u'nl', (ur'[\n\r]+',)), + ]) + try: + list(tokenize(u'f is ф')) + except LexerError, e: + self.assertEqual(unicode(e), + u'cannot tokenize data: 1,6: "f is \u0444"') + else: + self.fail(u'must raise LexerError') + + sometok = lambda type: some(lambda t: t.type == type) + keyword = lambda s: a(Token(u'keyword', s)) + + id = sometok(u'id') + is_ = keyword(u'is') + end = keyword(u'end') + nl = sometok(u'nl') + + equality = id + skip(is_) + id >> tuple + expr = equality + skip(nl) + file = many(expr) + end + + msg = """\ +spam is eggs +eggs isnt spam +end""" + toks = [x for x in tokenize(msg) if x.type != u'space'] + try: + file.parse(toks) + except NoParseError, e: + self.assertEqual(e.msg, + u"got unexpected token: 2,11-2,14: id 'spam'") + self.assertEqual(e.state.pos, 4) + self.assertEqual(e.state.max, 7) + # May raise KeyError + t = toks[e.state.max] + self.assertEqual(t, Token(u'id', u'spam')) + self.assertEqual((t.start, t.end), ((2, 11), (2, 14))) + else: + self.fail(u'must raise NoParseError') diff --git a/lint_lib/_vendor/funcparserlib/util.py b/lint_lib/_vendor/funcparserlib/util.py new file mode 100644 index 00000000..8a510bd2 --- /dev/null +++ b/lint_lib/_vendor/funcparserlib/util.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2008/2013 Andrey Vlasovskikh +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +def pretty_tree(x, kids, show): + """(a, (a -> list(a)), (a -> str)) -> str + + Returns a pseudographic tree representation of x similar to the tree command + in Unix. + """ + (MID, END, CONT, LAST, ROOT) = (u'|-- ', u'`-- ', u'| ', u' ', u'') + + def rec(x, indent, sym): + line = indent + sym + show(x) + xs = kids(x) + if len(xs) == 0: + return line + else: + if sym == MID: + next_indent = indent + CONT + elif sym == ROOT: + next_indent = indent + ROOT + else: + next_indent = indent + LAST + syms = [MID] * (len(xs) - 1) + [END] + lines = [rec(x, next_indent, sym) for x, sym in zip(xs, syms)] + return u'\n'.join([line] + lines) + + return rec(x, u'', ROOT) diff --git a/lint_lib/_vendor/vendor.txt b/lint_lib/_vendor/vendor.txt new file mode 100644 index 00000000..e681e601 --- /dev/null +++ b/lint_lib/_vendor/vendor.txt @@ -0,0 +1 @@ +funcparserlib==0.3.6 diff --git a/lint_lib/lint.py b/lint_lib/lint.py index d0f4b94b..4d7791e4 100644 --- a/lint_lib/lint.py +++ b/lint_lib/lint.py @@ -8,7 +8,7 @@ from collections import Counter, OrderedDict from os.path import dirname, join, pardir, relpath -from funcparserlib.parser import NoParseError +from ._vendor.funcparserlib.parser import NoParseError from . import parser @@ -153,10 +153,16 @@ def lint_tree_construction_test(path): parsed = lint_dat_format(path, "utf-8", "data") if not parsed: return + seen = set() for test in parsed: if not is_subsequence(list(test.keys()), ["data", "errors", "document-fragment", "script-off", "script-on", "document"]): print("unexpected test headings %r in %s" % (test.keys(), path)) + continue + items = tuple(test.items()) + if items in seen: + print("Duplicate test %r in %s" % (items, path)) + seen.add(items) def lint_tree_construction_tests(path): @@ -167,7 +173,11 @@ def lint_tree_construction_tests(path): lint_tree_construction_test(clean_path(join(root, file))) -if __name__ == "__main__": +def main(): lint_encoding_tests(join(base, "encoding")) lint_tokenizer_tests(join(base, "tokenizer")) lint_tree_construction_tests(join(base, "tree-construction")) + + +if __name__ == "__main__": + main() diff --git a/lint_lib/parser.py b/lint_lib/parser.py index 312b69cf..f272b4f0 100644 --- a/lint_lib/parser.py +++ b/lint_lib/parser.py @@ -2,9 +2,9 @@ import re -from funcparserlib.lexer import Token, LexerError -from funcparserlib.parser import (Parser, State, NoParseError, - finished, many, pure, skip, some) +from ._vendor.funcparserlib.lexer import Token, LexerError +from ._vendor.funcparserlib.parser import (Parser, State, NoParseError, + finished, many, pure, skip, some) text_type = type("") binary_type = type(b"")