From 4f290883cfa168a52829d29da9955e47e8c875ef Mon Sep 17 00:00:00 2001 From: Bertrand Bonnefoy-Claudet Date: Sat, 27 Oct 2018 12:06:34 +0200 Subject: [PATCH] Parse newline, UTF-8, trailing comment, backslash This adds support for: * multiline values (i.e. containing newlines or escaped \n), fixes #89 * backslashes in values, fixes #112 * trailing comments, fixes #141 * UTF-8 in unquoted values, fixes #147 Parsing is no longer line-based. That's why `parse_line` was replaced by `parse_binding`. Thanks to the previous commit, users of `parse_stream` don't have to deal with this change. This supersedes a previous pull-request, #142, which would add support for multiline values in `Dotenv.parse` but not in the CLI (`dotenv get` and `dotenv set`). The key-value binding regular expression was inspired by https://github.com/bkeepers/dotenv/blob/d749366b6009126b115fb7b63e0509566365859a/lib/dotenv/parser.rb#L14-L30 Parsing of escapes was fixed thanks to https://stackoverflow.com/questions/4020539/process-escape-sequences-in-a-string-in-python/24519338#24519338 --- dotenv/main.py | 80 +++++++++++++++++++++++++++++++++------------- tests/test_cli.py | 31 ++++++++++++++++++ tests/test_core.py | 71 +++++++++++++++++++++++++++++----------- 3 files changed, 140 insertions(+), 42 deletions(-) diff --git a/dotenv/main.py b/dotenv/main.py index 88565802..80aee939 100644 --- a/dotenv/main.py +++ b/dotenv/main.py @@ -15,44 +15,78 @@ from .compat import StringIO, PY2, WIN, text_type -__escape_decoder = codecs.getdecoder('unicode_escape') __posix_variable = re.compile(r'\$\{[^\}]*\}') -Binding = namedtuple('Binding', 'key value original') +_binding = re.compile( + r""" + ( + \s* # leading whitespace + (?:export\s+)? # export + + ( '[^']+' # single-quoted key + | [^=\#\s]+ # or unquoted key + )? + + (?: + (?:\s*=\s*) # equal sign + + ( '(?:\\'|[^'])*' # single-quoted value + | "(?:\\"|[^"])*" # or double-quoted value + | [^\#\r\n]* # or unquoted value + ) + )? + \s* # trailing whitespace + (?:\#[^\r\n]*)? # comment + \s* # trailing whitespace + (?:\r|\n|\r\n)? # newline + ) + """, + re.MULTILINE | re.VERBOSE, +) -def decode_escaped(escaped): - return __escape_decoder(escaped)[0] +_escape_sequence = re.compile(r"\\[\\'\"abfnrtv]") -def parse_line(line): - line = line.strip() +Binding = namedtuple('Binding', 'key value original') + - # Ignore lines with `#` or which doesn't have `=` in it. - if not line or line.startswith('#') or '=' not in line: - return None, None +def decode_escapes(string): + def decode_match(match): + return codecs.decode(match.group(0), 'unicode-escape') - k, v = line.split('=', 1) + return _escape_sequence.sub(decode_match, string) - if k.startswith('export '): - (_, _, k) = k.partition('export ') - # Remove any leading and trailing spaces in key, value - k, v = k.strip(), v.strip() +def is_surrounded_by(string, char): + return ( + len(string) > 1 + and string[0] == string[-1] == char + ) - if v: - v = v.encode('unicode-escape').decode('ascii') - quoted = v[0] == v[-1] in ['"', "'"] - if quoted: - v = decode_escaped(v[1:-1]) - return k, v +def parse_binding(string, position): + match = _binding.match(string, position) + (matched, key, value) = match.groups() + if key is None or value is None: + key = None + value = None + else: + value_quoted = is_surrounded_by(value, "'") or is_surrounded_by(value, '"') + if value_quoted: + value = decode_escapes(value[1:-1]) + else: + value = value.strip() + return (Binding(key=key, value=value, original=matched), match.end()) def parse_stream(stream): - for line in stream: - (key, value) = parse_line(line) - yield Binding(key=key, value=value, original=line) + string = stream.read() + position = 0 + length = len(string) + while position < length: + (binding, position) = parse_binding(string, position) + yield binding class DotEnv(): diff --git a/tests/test_cli.py b/tests/test_cli.py index 65359cef..b594592a 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -39,6 +39,11 @@ def test_set_key(dotenv_file): with open(dotenv_file, 'r') as fp: assert 'HELLO="WORLD 2"\nfoo="bar"' == fp.read().strip() + success, key_to_set, value_to_set = dotenv.set_key(dotenv_file, "HELLO", "WORLD\n3") + + with open(dotenv_file, "r") as fp: + assert 'HELLO="WORLD\n3"\nfoo="bar"' == fp.read().strip() + def test_set_key_permission_error(dotenv_file): os.chmod(dotenv_file, 0o000) @@ -71,6 +76,13 @@ def test_list_wo_file(cli): assert 'Invalid value for "-f"' in result.output +def test_empty_value(): + with open(dotenv_path, "w") as f: + f.write("TEST=") + assert dotenv.get_key(dotenv_path, "TEST") == "" + sh.rm(dotenv_path) + + def test_key_value_without_quotes(): with open(dotenv_path, 'w') as f: f.write("TEST = value \n") @@ -107,6 +119,25 @@ def test_value_with_special_characters(): sh.rm(dotenv_path) +def test_value_with_new_lines(): + with open(dotenv_path, 'w') as f: + f.write('TEST="a\nb"') + assert dotenv.get_key(dotenv_path, 'TEST') == "a\nb" + sh.rm(dotenv_path) + + with open(dotenv_path, 'w') as f: + f.write("TEST='a\nb'") + assert dotenv.get_key(dotenv_path, 'TEST') == "a\nb" + sh.rm(dotenv_path) + + +def test_value_after_comment(): + with open(dotenv_path, "w") as f: + f.write("# comment\nTEST=a") + assert dotenv.get_key(dotenv_path, "TEST") == "a" + sh.rm(dotenv_path) + + def test_unset_ok(dotenv_file): with open(dotenv_file, "w") as f: f.write("a=b\nc=d") diff --git a/tests/test_core.py b/tests/test_core.py index b1b03723..bda2e3b7 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -9,7 +9,7 @@ import sh from dotenv import load_dotenv, find_dotenv, set_key, dotenv_values -from dotenv.main import Binding, parse_line, parse_stream +from dotenv.main import Binding, parse_stream from dotenv.compat import StringIO from IPython.terminal.embed import InteractiveShellEmbed @@ -24,27 +24,31 @@ def restore_os_environ(): os.environ.update(environ) -@pytest.mark.parametrize("test_input,expected", [ - ("a=b", ("a", "b")), - (" a = b ", ("a", "b")), - ("export a=b", ("a", "b")), - (" export 'a'=b", ("'a'", "b")), - (" export 'a'=b", ("'a'", "b")), - ("# a=b", (None, None)), - ("# a=b", (None, None)), - ("a=b space ", ('a', 'b space')), - ("a='b space '", ('a', 'b space ')), - ('a="b space "', ('a', 'b space ')), - ("export export_spam=1", ("export_spam", "1")), - ("export port=8000", ("port", "8000")), -]) -def test_parse_line(test_input, expected): - assert parse_line(test_input) == expected - - @pytest.mark.parametrize("test_input,expected", [ ("", []), ("a=b", [Binding(key="a", value="b", original="a=b")]), + ("'a'=b", [Binding(key="'a'", value="b", original="'a'=b")]), + ("[=b", [Binding(key="[", value="b", original="[=b")]), + (" a = b ", [Binding(key="a", value="b", original=" a = b ")]), + ("export a=b", [Binding(key="a", value="b", original="export a=b")]), + (" export 'a'=b", [Binding(key="'a'", value="b", original=" export 'a'=b")]), + (" export 'a'=b", [Binding(key="'a'", value="b", original=" export 'a'=b")]), + ("# a=b", [Binding(key=None, value=None, original="# a=b")]), + ('a=b # comment', [Binding(key="a", value="b", original="a=b # comment")]), + ("a=b space ", [Binding(key="a", value="b space", original="a=b space ")]), + ("a='b space '", [Binding(key="a", value="b space ", original="a='b space '")]), + ('a="b space "', [Binding(key="a", value="b space ", original='a="b space "')]), + ("export export_a=1", [Binding(key="export_a", value="1", original="export export_a=1")]), + ("export port=8000", [Binding(key="port", value="8000", original="export port=8000")]), + ('a="b\nc"', [Binding(key="a", value="b\nc", original='a="b\nc"')]), + ("a='b\nc'", [Binding(key="a", value="b\nc", original="a='b\nc'")]), + ('a="b\nc"', [Binding(key="a", value="b\nc", original='a="b\nc"')]), + ('a="b\\nc"', [Binding(key="a", value='b\nc', original='a="b\\nc"')]), + ('a="b\\"c"', [Binding(key="a", value='b"c', original='a="b\\"c"')]), + ("a='b\\'c'", [Binding(key="a", value="b'c", original="a='b\\'c'")]), + ("a=à", [Binding(key="a", value="à", original="a=à")]), + ('a="à"', [Binding(key="a", value="à", original='a="à"')]), + ('garbage', [Binding(key=None, value=None, original="garbage")]), ( "a=b\nc=d", [ @@ -52,6 +56,35 @@ def test_parse_line(test_input, expected): Binding(key="c", value="d", original="c=d"), ], ), + ( + "a=b\r\nc=d", + [ + Binding(key="a", value="b", original="a=b\r\n"), + Binding(key="c", value="d", original="c=d"), + ], + ), + ( + 'a="\nb=c', + [ + Binding(key="a", value='"', original='a="\n'), + Binding(key="b", value='c', original="b=c"), + ] + ), + ( + '# comment\na="b\nc"\nd=e\n', + [ + Binding(key=None, value=None, original="# comment\n"), + Binding(key="a", value="b\nc", original='a="b\nc"\n'), + Binding(key="d", value="e", original="d=e\n"), + ], + ), + ( + 'garbage[%$#\na=b', + [ + Binding(key=None, value=None, original="garbage[%$#\n"), + Binding(key="a", value="b", original='a=b'), + ], + ), ]) def test_parse_stream(test_input, expected): result = parse_stream(StringIO(test_input))