Skip to content

Commit

Permalink
Parse newline, UTF-8, trailing comment, backslash
Browse files Browse the repository at this point in the history
This adds support for:

* multiline values (i.e. containing newlines or escaped \n), fixes #89
* backslashes in values, fixes #112
* trailing comments, fixes #141
* UTF-8 in unquoted values, fixes #147

Parsing is no longer line-based.  That's why `parse_line` was replaced
by `parse_binding`.  Thanks to the previous commit, users of
`parse_stream` don't have to deal with this change.

This supersedes a previous pull-request, #142, which would add support for
multiline values in `Dotenv.parse` but not in the CLI (`dotenv get` and `dotenv
set`).

The key-value binding regular expression was inspired by
https://github.com/bkeepers/dotenv/blob/d749366b6009126b115fb7b63e0509566365859a/lib/dotenv/parser.rb#L14-L30

Parsing of escapes was fixed thanks to
https://stackoverflow.com/questions/4020539/process-escape-sequences-in-a-string-in-python/24519338#24519338
  • Loading branch information
bbc2 committed Oct 31, 2018
1 parent 339ffe4 commit 4f29088
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 42 deletions.
80 changes: 57 additions & 23 deletions dotenv/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,44 +15,78 @@

from .compat import StringIO, PY2, WIN, text_type

__escape_decoder = codecs.getdecoder('unicode_escape')
__posix_variable = re.compile(r'\$\{[^\}]*\}')

Binding = namedtuple('Binding', 'key value original')
_binding = re.compile(
r"""
(
\s* # leading whitespace
(?:export\s+)? # export
( '[^']+' # single-quoted key
| [^=\#\s]+ # or unquoted key
)?
(?:
(?:\s*=\s*) # equal sign
( '(?:\\'|[^'])*' # single-quoted value
| "(?:\\"|[^"])*" # or double-quoted value
| [^\#\r\n]* # or unquoted value
)
)?
\s* # trailing whitespace
(?:\#[^\r\n]*)? # comment
\s* # trailing whitespace
(?:\r|\n|\r\n)? # newline
)
""",
re.MULTILINE | re.VERBOSE,
)

def decode_escaped(escaped):
return __escape_decoder(escaped)[0]
_escape_sequence = re.compile(r"\\[\\'\"abfnrtv]")


def parse_line(line):
line = line.strip()
Binding = namedtuple('Binding', 'key value original')


# Ignore lines with `#` or which doesn't have `=` in it.
if not line or line.startswith('#') or '=' not in line:
return None, None
def decode_escapes(string):
def decode_match(match):
return codecs.decode(match.group(0), 'unicode-escape')

k, v = line.split('=', 1)
return _escape_sequence.sub(decode_match, string)

if k.startswith('export '):
(_, _, k) = k.partition('export ')

# Remove any leading and trailing spaces in key, value
k, v = k.strip(), v.strip()
def is_surrounded_by(string, char):
return (
len(string) > 1
and string[0] == string[-1] == char
)

if v:
v = v.encode('unicode-escape').decode('ascii')
quoted = v[0] == v[-1] in ['"', "'"]
if quoted:
v = decode_escaped(v[1:-1])

return k, v
def parse_binding(string, position):
match = _binding.match(string, position)
(matched, key, value) = match.groups()
if key is None or value is None:
key = None
value = None
else:
value_quoted = is_surrounded_by(value, "'") or is_surrounded_by(value, '"')
if value_quoted:
value = decode_escapes(value[1:-1])
else:
value = value.strip()
return (Binding(key=key, value=value, original=matched), match.end())


def parse_stream(stream):
for line in stream:
(key, value) = parse_line(line)
yield Binding(key=key, value=value, original=line)
string = stream.read()
position = 0
length = len(string)
while position < length:
(binding, position) = parse_binding(string, position)
yield binding


class DotEnv():
Expand Down
31 changes: 31 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ def test_set_key(dotenv_file):
with open(dotenv_file, 'r') as fp:
assert 'HELLO="WORLD 2"\nfoo="bar"' == fp.read().strip()

success, key_to_set, value_to_set = dotenv.set_key(dotenv_file, "HELLO", "WORLD\n3")

with open(dotenv_file, "r") as fp:
assert 'HELLO="WORLD\n3"\nfoo="bar"' == fp.read().strip()


def test_set_key_permission_error(dotenv_file):
os.chmod(dotenv_file, 0o000)
Expand Down Expand Up @@ -71,6 +76,13 @@ def test_list_wo_file(cli):
assert 'Invalid value for "-f"' in result.output


def test_empty_value():
with open(dotenv_path, "w") as f:
f.write("TEST=")
assert dotenv.get_key(dotenv_path, "TEST") == ""
sh.rm(dotenv_path)


def test_key_value_without_quotes():
with open(dotenv_path, 'w') as f:
f.write("TEST = value \n")
Expand Down Expand Up @@ -107,6 +119,25 @@ def test_value_with_special_characters():
sh.rm(dotenv_path)


def test_value_with_new_lines():
with open(dotenv_path, 'w') as f:
f.write('TEST="a\nb"')
assert dotenv.get_key(dotenv_path, 'TEST') == "a\nb"
sh.rm(dotenv_path)

with open(dotenv_path, 'w') as f:
f.write("TEST='a\nb'")
assert dotenv.get_key(dotenv_path, 'TEST') == "a\nb"
sh.rm(dotenv_path)


def test_value_after_comment():
with open(dotenv_path, "w") as f:
f.write("# comment\nTEST=a")
assert dotenv.get_key(dotenv_path, "TEST") == "a"
sh.rm(dotenv_path)


def test_unset_ok(dotenv_file):
with open(dotenv_file, "w") as f:
f.write("a=b\nc=d")
Expand Down
71 changes: 52 additions & 19 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import sh

from dotenv import load_dotenv, find_dotenv, set_key, dotenv_values
from dotenv.main import Binding, parse_line, parse_stream
from dotenv.main import Binding, parse_stream
from dotenv.compat import StringIO
from IPython.terminal.embed import InteractiveShellEmbed

Expand All @@ -24,34 +24,67 @@ def restore_os_environ():
os.environ.update(environ)


@pytest.mark.parametrize("test_input,expected", [
("a=b", ("a", "b")),
(" a = b ", ("a", "b")),
("export a=b", ("a", "b")),
(" export 'a'=b", ("'a'", "b")),
(" export 'a'=b", ("'a'", "b")),
("# a=b", (None, None)),
("# a=b", (None, None)),
("a=b space ", ('a', 'b space')),
("a='b space '", ('a', 'b space ')),
('a="b space "', ('a', 'b space ')),
("export export_spam=1", ("export_spam", "1")),
("export port=8000", ("port", "8000")),
])
def test_parse_line(test_input, expected):
assert parse_line(test_input) == expected


@pytest.mark.parametrize("test_input,expected", [
("", []),
("a=b", [Binding(key="a", value="b", original="a=b")]),
("'a'=b", [Binding(key="'a'", value="b", original="'a'=b")]),
("[=b", [Binding(key="[", value="b", original="[=b")]),
(" a = b ", [Binding(key="a", value="b", original=" a = b ")]),
("export a=b", [Binding(key="a", value="b", original="export a=b")]),
(" export 'a'=b", [Binding(key="'a'", value="b", original=" export 'a'=b")]),
(" export 'a'=b", [Binding(key="'a'", value="b", original=" export 'a'=b")]),
("# a=b", [Binding(key=None, value=None, original="# a=b")]),
('a=b # comment', [Binding(key="a", value="b", original="a=b # comment")]),
("a=b space ", [Binding(key="a", value="b space", original="a=b space ")]),
("a='b space '", [Binding(key="a", value="b space ", original="a='b space '")]),
('a="b space "', [Binding(key="a", value="b space ", original='a="b space "')]),
("export export_a=1", [Binding(key="export_a", value="1", original="export export_a=1")]),
("export port=8000", [Binding(key="port", value="8000", original="export port=8000")]),
('a="b\nc"', [Binding(key="a", value="b\nc", original='a="b\nc"')]),
("a='b\nc'", [Binding(key="a", value="b\nc", original="a='b\nc'")]),
('a="b\nc"', [Binding(key="a", value="b\nc", original='a="b\nc"')]),
('a="b\\nc"', [Binding(key="a", value='b\nc', original='a="b\\nc"')]),
('a="b\\"c"', [Binding(key="a", value='b"c', original='a="b\\"c"')]),
("a='b\\'c'", [Binding(key="a", value="b'c", original="a='b\\'c'")]),
("a=à", [Binding(key="a", value="à", original="a=à")]),
('a="à"', [Binding(key="a", value="à", original='a="à"')]),
('garbage', [Binding(key=None, value=None, original="garbage")]),
(
"a=b\nc=d",
[
Binding(key="a", value="b", original="a=b\n"),
Binding(key="c", value="d", original="c=d"),
],
),
(
"a=b\r\nc=d",
[
Binding(key="a", value="b", original="a=b\r\n"),
Binding(key="c", value="d", original="c=d"),
],
),
(
'a="\nb=c',
[
Binding(key="a", value='"', original='a="\n'),
Binding(key="b", value='c', original="b=c"),
]
),
(
'# comment\na="b\nc"\nd=e\n',
[
Binding(key=None, value=None, original="# comment\n"),
Binding(key="a", value="b\nc", original='a="b\nc"\n'),
Binding(key="d", value="e", original="d=e\n"),
],
),
(
'garbage[%$#\na=b',
[
Binding(key=None, value=None, original="garbage[%$#\n"),
Binding(key="a", value="b", original='a=b'),
],
),
])
def test_parse_stream(test_input, expected):
result = parse_stream(StringIO(test_input))
Expand Down

0 comments on commit 4f29088

Please sign in to comment.