From 6645530952e50ed13ac51fc012ee420cff64dfaa Mon Sep 17 00:00:00 2001 From: Chris Hunt Date: Sun, 20 Oct 2019 12:03:37 -0400 Subject: [PATCH 1/5] Do not create line-specific parsers for requirements files This clears the way and for us to create our parser outside the function next. --- src/pip/_internal/req/req_file.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/pip/_internal/req/req_file.py b/src/pip/_internal/req/req_file.py index 47486fa1fbc..4397538aa14 100644 --- a/src/pip/_internal/req/req_file.py +++ b/src/pip/_internal/req/req_file.py @@ -167,7 +167,7 @@ def process_line( :param constraint: If True, parsing a constraints file. :param options: OptionParser options that we may update """ - parser = build_parser(line) + parser = build_parser() defaults = parser.get_default_values() defaults.index_url = None if finder: @@ -177,9 +177,14 @@ def process_line( if sys.version_info < (2, 7, 3): # https://github.com/python/mypy/issues/1174 options_str = options_str.encode('utf8') # type: ignore - # https://github.com/python/mypy/issues/1174 - opts, _ = parser.parse_args( - shlex.split(options_str), defaults) # type: ignore + try: + # https://github.com/python/mypy/issues/1174 + opts, _ = parser.parse_args( + shlex.split(options_str), defaults) # type: ignore + except OptionParsingError as e: + # add offending line + msg = 'Invalid requirement: %s\n%s' % (line, e.msg) + raise RequirementsFileParseError(msg) # preserve for the nested code path line_comes_from = '%s %s (line %s)' % ( @@ -297,8 +302,14 @@ def break_args_options(line): return ' '.join(args), ' '.join(options) # type: ignore -def build_parser(line): - # type: (Text) -> optparse.OptionParser +class OptionParsingError(Exception): + def __init__(self, msg): + # type: (str) -> None + self.msg = msg + + +def build_parser(): + # type: () -> optparse.OptionParser """ Return a parser for parsing requirement lines """ @@ -313,9 +324,7 @@ def build_parser(line): # that in our own exception. def parser_exit(self, msg): # type: (Any, str) -> NoReturn - # add offending line - msg = 'Invalid requirement: %s\n%s' % (line, msg) - raise RequirementsFileParseError(msg) + raise OptionParsingError(msg) # NOTE: mypy disallows assigning to a method # https://github.com/python/mypy/issues/2427 parser.exit = parser_exit # type: ignore From f0b20f19aed51c451c529c7ec354f4f3ed0c0fc0 Mon Sep 17 00:00:00 2001 From: Chris Hunt Date: Sun, 20 Oct 2019 12:24:24 -0400 Subject: [PATCH 2/5] Hide line parsing details behind a line parser Simplifies reading the code that actually processes the line. --- src/pip/_internal/req/req_file.py | 45 ++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/src/pip/_internal/req/req_file.py b/src/pip/_internal/req/req_file.py index 4397538aa14..79fd883ab33 100644 --- a/src/pip/_internal/req/req_file.py +++ b/src/pip/_internal/req/req_file.py @@ -31,9 +31,11 @@ from pip._internal.utils.urls import get_url_scheme if MYPY_CHECK_RUNNING: + from optparse import Values from typing import ( Any, Callable, Iterator, List, NoReturn, Optional, Text, Tuple, ) + from pip._internal.req import InstallRequirement from pip._internal.cache import WheelCache from pip._internal.index.package_finder import PackageFinder @@ -41,6 +43,9 @@ ReqFileLines = Iterator[Tuple[int, Text]] + LineParser = Callable[[Text], Tuple[str, Values]] + + __all__ = ['parse_requirements'] SCHEME_RE = re.compile(r'^(http|https|file):', re.I) @@ -167,20 +172,9 @@ def process_line( :param constraint: If True, parsing a constraints file. :param options: OptionParser options that we may update """ - parser = build_parser() - defaults = parser.get_default_values() - defaults.index_url = None - if finder: - defaults.format_control = finder.format_control - args_str, options_str = break_args_options(line) - # Prior to 2.7.3, shlex cannot deal with unicode entries - if sys.version_info < (2, 7, 3): - # https://github.com/python/mypy/issues/1174 - options_str = options_str.encode('utf8') # type: ignore + line_parser = get_line_parser(finder) try: - # https://github.com/python/mypy/issues/1174 - opts, _ = parser.parse_args( - shlex.split(options_str), defaults) # type: ignore + args_str, opts = line_parser(line) except OptionParsingError as e: # add offending line msg = 'Invalid requirement: %s\n%s' % (line, e.msg) @@ -284,6 +278,31 @@ def process_line( session.add_trusted_host(host, source=source) +def get_line_parser(finder): + # type: (Optional[PackageFinder]) -> LineParser + parser = build_parser() + defaults = parser.get_default_values() + defaults.index_url = None + if finder: + defaults.format_control = finder.format_control + + def parse_line(line): + # type: (Text) -> Tuple[str, Values] + args_str, options_str = break_args_options(line) + # Prior to 2.7.3, shlex cannot deal with unicode entries + if sys.version_info < (2, 7, 3): + # https://github.com/python/mypy/issues/1174 + options_str = options_str.encode('utf8') # type: ignore + + # https://github.com/python/mypy/issues/1174 + opts, _ = parser.parse_args( + shlex.split(options_str), defaults) # type: ignore + + return args_str, opts + + return parse_line + + def break_args_options(line): # type: (Text) -> Tuple[str, Text] """Break up the line into an args and options string. We only want to shlex From 4d7fc272b42a9be9fac176113342788024d20189 Mon Sep 17 00:00:00 2001 From: Chris Hunt Date: Sun, 20 Oct 2019 12:45:10 -0400 Subject: [PATCH 3/5] Remove no-action TODO comes_from is only used in get_file_content, which expects to see a URL or path, so there is no need to decorate it. --- src/pip/_internal/req/req_file.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pip/_internal/req/req_file.py b/src/pip/_internal/req/req_file.py index 79fd883ab33..2b57bd07f2e 100644 --- a/src/pip/_internal/req/req_file.py +++ b/src/pip/_internal/req/req_file.py @@ -232,7 +232,6 @@ def process_line( elif not SCHEME_RE.search(req_path): # do a join so relative paths work req_path = os.path.join(os.path.dirname(filename), req_path) - # TODO: Why not use `comes_from='-r {} (line {})'` here as well? parsed_reqs = parse_requirements( req_path, finder, comes_from, options, session, constraint=nested_constraint, wheel_cache=wheel_cache From c8307614f23ddc160e812ab76fa492c9432a5db6 Mon Sep 17 00:00:00 2001 From: Chris Hunt Date: Sun, 20 Oct 2019 12:50:07 -0400 Subject: [PATCH 4/5] Do requirement file recursion first This change makes factoring out the parsing more obvious. --- src/pip/_internal/req/req_file.py | 51 +++++++++++++++++-------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/src/pip/_internal/req/req_file.py b/src/pip/_internal/req/req_file.py index 2b57bd07f2e..f2a69c280ff 100644 --- a/src/pip/_internal/req/req_file.py +++ b/src/pip/_internal/req/req_file.py @@ -180,6 +180,34 @@ def process_line( msg = 'Invalid requirement: %s\n%s' % (line, e.msg) raise RequirementsFileParseError(msg) + # parse a nested requirements file + if ( + not args_str and + not opts.editable and + (opts.requirements or opts.constraints) + ): + if opts.requirements: + req_path = opts.requirements[0] + nested_constraint = False + else: + req_path = opts.constraints[0] + nested_constraint = True + # original file is over http + if SCHEME_RE.search(filename): + # do a url join so relative paths work + req_path = urllib_parse.urljoin(filename, req_path) + # original file and nested file are paths + elif not SCHEME_RE.search(req_path): + # do a join so relative paths work + req_path = os.path.join(os.path.dirname(filename), req_path) + parsed_reqs = parse_requirements( + req_path, finder, comes_from, options, session, + constraint=nested_constraint, wheel_cache=wheel_cache + ) + for req in parsed_reqs: + yield req + return + # preserve for the nested code path line_comes_from = '%s %s (line %s)' % ( '-c' if constraint else '-r', filename, line_number, @@ -216,29 +244,6 @@ def process_line( constraint=constraint, isolated=isolated, wheel_cache=wheel_cache ) - # parse a nested requirements file - elif opts.requirements or opts.constraints: - if opts.requirements: - req_path = opts.requirements[0] - nested_constraint = False - else: - req_path = opts.constraints[0] - nested_constraint = True - # original file is over http - if SCHEME_RE.search(filename): - # do a url join so relative paths work - req_path = urllib_parse.urljoin(filename, req_path) - # original file and nested file are paths - elif not SCHEME_RE.search(req_path): - # do a join so relative paths work - req_path = os.path.join(os.path.dirname(filename), req_path) - parsed_reqs = parse_requirements( - req_path, finder, comes_from, options, session, - constraint=nested_constraint, wheel_cache=wheel_cache - ) - for req in parsed_reqs: - yield req - # percolate hash-checking option upward elif opts.require_hashes: options.require_hashes = opts.require_hashes From a5d53eab0a2374de8730d51109efad88ee2b3d5a Mon Sep 17 00:00:00 2001 From: Chris Hunt Date: Sun, 20 Oct 2019 13:16:42 -0400 Subject: [PATCH 5/5] Simplify skip_requirements_regex option handling Decouples `process_lines` from our CLI options. --- src/pip/_internal/req/req_file.py | 26 ++++++++++++++------------ tests/unit/test_req_file.py | 25 ++++++++++--------------- 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/src/pip/_internal/req/req_file.py b/src/pip/_internal/req/req_file.py index f2a69c280ff..bbc12555a96 100644 --- a/src/pip/_internal/req/req_file.py +++ b/src/pip/_internal/req/req_file.py @@ -117,7 +117,10 @@ def parse_requirements( filename, comes_from=comes_from, session=session ) - lines_enum = preprocess(content, options) + skip_requirements_regex = ( + options.skip_requirements_regex if options else None + ) + lines_enum = preprocess(content, skip_requirements_regex) for line_number, line in lines_enum: req_iter = process_line(line, filename, line_number, finder, @@ -127,8 +130,8 @@ def parse_requirements( yield req -def preprocess(content, options): - # type: (Text, Optional[optparse.Values]) -> ReqFileLines +def preprocess(content, skip_requirements_regex): + # type: (Text, Optional[str]) -> ReqFileLines """Split, filter, and join lines, and return a line iterator :param content: the content of the requirements file @@ -137,7 +140,8 @@ def preprocess(content, options): lines_enum = enumerate(content.splitlines(), start=1) # type: ReqFileLines lines_enum = join_lines(lines_enum) lines_enum = ignore_comments(lines_enum) - lines_enum = skip_regex(lines_enum, options) + if skip_requirements_regex: + lines_enum = skip_regex(lines_enum, skip_requirements_regex) lines_enum = expand_env_variables(lines_enum) return lines_enum @@ -183,7 +187,7 @@ def process_line( # parse a nested requirements file if ( not args_str and - not opts.editable and + not opts.editables and (opts.requirements or opts.constraints) ): if opts.requirements: @@ -397,17 +401,15 @@ def ignore_comments(lines_enum): yield line_number, line -def skip_regex(lines_enum, options): - # type: (ReqFileLines, Optional[optparse.Values]) -> ReqFileLines +def skip_regex(lines_enum, pattern): + # type: (ReqFileLines, str) -> ReqFileLines """ - Skip lines that match '--skip-requirements-regex' pattern + Skip lines that match the provided pattern Note: the regex pattern is only built once """ - skip_regex = options.skip_requirements_regex if options else None - if skip_regex: - pattern = re.compile(skip_regex) - lines_enum = filterfalse(lambda e: pattern.search(e[1]), lines_enum) + matcher = re.compile(pattern) + lines_enum = filterfalse(lambda e: matcher.search(e[1]), lines_enum) return lines_enum diff --git a/tests/unit/test_req_file.py b/tests/unit/test_req_file.py index ca86a191b29..29145680f62 100644 --- a/tests/unit/test_req_file.py +++ b/tests/unit/test_req_file.py @@ -83,8 +83,8 @@ def test_skip_regex_after_joining_case1(self, options): ern line2 """) - options.skip_requirements_regex = 'pattern' - result = preprocess(content, options) + skip_requirements_regex = 'pattern' + result = preprocess(content, skip_requirements_regex) assert list(result) == [(3, 'line2')] def test_skip_regex_after_joining_case2(self, options): @@ -93,8 +93,8 @@ def test_skip_regex_after_joining_case2(self, options): line2 line3 """) - options.skip_requirements_regex = 'pattern' - result = preprocess(content, options) + skip_requirements_regex = 'pattern' + result = preprocess(content, skip_requirements_regex) assert list(result) == [(3, 'line3')] @@ -154,24 +154,19 @@ class TestSkipRegex(object): """tests for `skip_reqex``""" def test_skip_regex_pattern_match(self): - options = stub(skip_requirements_regex='.*Bad.*') + pattern = '.*Bad.*' line = '--extra-index-url Bad' - assert [] == list(skip_regex(enumerate([line]), options)) + assert [] == list(skip_regex(enumerate([line]), pattern)) def test_skip_regex_pattern_not_match(self): - options = stub(skip_requirements_regex='.*Bad.*') + pattern = '.*Bad.*' line = '--extra-index-url Good' - assert [(0, line)] == list(skip_regex(enumerate([line]), options)) + assert [(0, line)] == list(skip_regex(enumerate([line]), pattern)) def test_skip_regex_no_options(self): - options = None + pattern = None line = '--extra-index-url Good' - assert [(0, line)] == list(skip_regex(enumerate([line]), options)) - - def test_skip_regex_no_skip_option(self): - options = stub(skip_requirements_regex=None) - line = '--extra-index-url Good' - assert [(0, line)] == list(skip_regex(enumerate([line]), options)) + assert [(1, line)] == list(preprocess(line, pattern)) class TestProcessLine(object):