From 33e49ec770e04d87aa0f8d4dee8a777807beb354 Mon Sep 17 00:00:00 2001 From: Vojta Drabek Date: Fri, 26 Apr 2019 12:33:52 +0200 Subject: [PATCH 1/2] Escaping --- toml-test | 1 + toml/encoder.py | 60 ++++++++++++++++++++++++------------------------- 2 files changed, 31 insertions(+), 30 deletions(-) create mode 160000 toml-test diff --git a/toml-test b/toml-test new file mode 160000 index 0000000..f910e15 --- /dev/null +++ b/toml-test @@ -0,0 +1 @@ +Subproject commit f910e151d1b14d94b1e8a4264db0814fb03520d9 diff --git a/toml/encoder.py b/toml/encoder.py index 94860e1..f0ed844 100644 --- a/toml/encoder.py +++ b/toml/encoder.py @@ -70,37 +70,37 @@ def dumps(o, encoder=None): sections = newsections return retval - -def _dump_str(v): +def _dump_str(v, escape_unicode=True): if sys.version_info < (3,) and hasattr(v, 'decode') and isinstance(v, str): v = v.decode('utf-8') - v = "%r" % v - if v[0] == 'u': - v = v[1:] - singlequote = v.startswith("'") - if singlequote or v.startswith('"'): - v = v[1:-1] - if singlequote: - v = v.replace("\\'", "'") - v = v.replace('"', '\\"') - v = v.split("\\x") - while len(v) > 1: - i = -1 - if not v[0]: - v = v[1:] - v[0] = v[0].replace("\\\\", "\\") - # No, I don't know why != works and == breaks - joinx = v[0][i] != "\\" - while v[0][:i] and v[0][i] == "\\": - joinx = not joinx - i -= 1 - if joinx: - joiner = "x" - else: - joiner = "u00" - v = [v[0] + joiner + v[1]] + v[2:] - return unicode('"' + v[0] + '"') - + else: + v = unicode(v) + out = '' + quote = '"""' if len(v.splitlines()) > 1 else '"' + for line in v.splitlines(): + for char in line: + c = ord(char) + if (escape_unicode and c > 127) or c <= 0x1f or c == 0x7f: + h = hex(c)[2:] + if len(h) < 2: + h = '0' + h + out += '\\x' + if c > 255 and len(h) < 4: + h = '0' * (4 - len(h)) + h + out += '\\u' + if c > 65536 and len(h) < 8: + h = '0' * (8 - len(h)) + h + out += '\\U' + out += h + else: + if char == '\\' or char == '"': + out += '\\' + out += char + out += '\n' + out = out[:-1] + if quote == '"""': + out = '\n' + out + return unicode('%s%s%s' % (quote, out, quote)) def _dump_float(v): return "{0:.16}".format(v).replace("e+0", "e+").replace("e-0", "e-") @@ -116,7 +116,7 @@ def _dump_time(v): class TomlEncoder(object): - def __init__(self, _dict=dict, preserve=False): + def __init__(self, _dict=dict, preserve=False, escape_unicode=True): self._dict = _dict self.preserve = preserve self.dump_funcs = { From dc0092efd50dbd25974099ffee67780b51c47787 Mon Sep 17 00:00:00 2001 From: Vojta Drabek Date: Sat, 27 Apr 2019 14:09:00 +0200 Subject: [PATCH 2/2] Use multiline and escape options --- toml/encoder.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/toml/encoder.py b/toml/encoder.py index f0ed844..39070d3 100644 --- a/toml/encoder.py +++ b/toml/encoder.py @@ -1,4 +1,5 @@ import datetime +import functools import re import sys from decimal import Decimal @@ -70,13 +71,13 @@ def dumps(o, encoder=None): sections = newsections return retval -def _dump_str(v, escape_unicode=True): +def _dump_str(v, escape_unicode=True, multiline=False): if sys.version_info < (3,) and hasattr(v, 'decode') and isinstance(v, str): v = v.decode('utf-8') else: v = unicode(v) out = '' - quote = '"""' if len(v.splitlines()) > 1 else '"' + quote = '"""' if len(v.splitlines()) > 1 and multiline else '"' for line in v.splitlines(): for char in line: c = ord(char) @@ -93,13 +94,14 @@ def _dump_str(v, escape_unicode=True): out += '\\U' out += h else: - if char == '\\' or char == '"': - out += '\\' + if char == '\\' or (char == '"' and quote != '"""'): + out += '\\' out += char out += '\n' - out = out[:-1] if quote == '"""': out = '\n' + out + else: + out = out[:-1] return unicode('%s%s%s' % (quote, out, quote)) def _dump_float(v): @@ -116,12 +118,13 @@ def _dump_time(v): class TomlEncoder(object): - def __init__(self, _dict=dict, preserve=False, escape_unicode=True): + def __init__(self, _dict=dict, preserve=False, escape_unicode=True, multiline=False): self._dict = _dict self.preserve = preserve + dump_str = functools.partial(_dump_str, escape_unicode=escape_unicode, multiline=multiline) self.dump_funcs = { - str: _dump_str, - unicode: _dump_str, + str: dump_str, + unicode: dump_str, list: self.dump_list, bool: lambda v: unicode(v).lower(), int: lambda v: v,