Skip to content

Commit

Permalink
Simplify the escaping of backslashes in quoted strings
Browse files Browse the repository at this point in the history
Fixes omry#615
  • Loading branch information
odelalleau committed Mar 17, 2021
1 parent f24fe6a commit 7424df5
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 85 deletions.
12 changes: 9 additions & 3 deletions omegaconf/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -610,10 +610,13 @@ def _maybe_resolve_interpolation(
key: Any,
value: "Node",
throw_on_resolution_failure: bool,
force_resolution: bool = False,
) -> Any:
value_kind = get_value_kind(value)
if value_kind != ValueKind.INTERPOLATION:
return value
if not force_resolution:
# Skip resolution if not an interpolation.
value_kind = get_value_kind(value)
if value_kind != ValueKind.INTERPOLATION:
return value

parse_tree = parse(_get_value(value))
return self._resolve_interpolation_from_parse_tree(
Expand Down Expand Up @@ -662,6 +665,9 @@ def quoted_string_callback(quoted_str: str) -> str:
is_optional=True,
),
throw_on_resolution_failure=True,
# We must always process the string as if it contained an
# interpolation, for proper un-escaping of backslashes (\\).
force_resolution=True,
)
return str(quoted_val)

Expand Down
2 changes: 1 addition & 1 deletion omegaconf/grammar/OmegaConfGrammarParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ options {tokenVocab = OmegaConfGrammarLexer;}

// Main rules used to parse OmegaConf strings.

configValue: (toplevelStr | (toplevelStr? (interpolation toplevelStr?)+)) EOF;
configValue: (toplevelStr | (toplevelStr? (interpolation toplevelStr?)+))? EOF;
singleElement: element EOF;

// Top-level string (that does not need to be parsed).
Expand Down
16 changes: 5 additions & 11 deletions omegaconf/grammar_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,9 @@ def visitConfigKey(self, ctx: OmegaConfGrammarParser.ConfigKeyContext) -> str:
return child.symbol.text

def visitConfigValue(self, ctx: OmegaConfGrammarParser.ConfigValueContext) -> Any:
# (toplevelStr | (toplevelStr? (interpolation toplevelStr?)+)) EOF
# (toplevelStr | (toplevelStr? (interpolation toplevelStr?)+))? EOF
# Visit all children (except last one which is EOF)
vals = [self.visit(c) for c in list(ctx.getChildren())[:-1]]
assert vals
if len(vals) == 1 and isinstance(
ctx.getChild(0), OmegaConfGrammarParser.InterpolationContext
):
Expand Down Expand Up @@ -335,16 +334,11 @@ def _resolve_quoted_string(self, quoted: str) -> str:
quote_type = quoted[0]
assert quote_type in ["'", '"']

# Un-escape quotes and backslashes within the string (the two kinds of
# escapable characters in quoted strings). We do it in two passes:
# 1. Replace `\"` with `"` (and similarly for single quotes)
# 2. Replace `\\` with `\`
# The order is important so that `\\"` is replaced with an escaped quote `\"`.
# We also remove the start and end quotes.
# Remove start and end quotes and un-escape quotes within the string.
# Note that, importantly, we do *not* un-escape backslashes (\\) here:
# instead, we rely on `quoted_string_callback()` to take care of these.
esc_quote = f"\\{quote_type}"
quoted_content = (
quoted[1:-1].replace(esc_quote, quote_type).replace("\\\\", "\\")
)
quoted_content = quoted[1:-1].replace(esc_quote, quote_type)

# Parse the string.
return self.quoted_string_callback(quoted_content)
Expand Down
140 changes: 70 additions & 70 deletions tests/test_grammar.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
)


# Parameters for tests of the "singleElement" rule when there is no interpolation.
# Parameters for tests of the "singleElement" rule when there is no need for callbacks.
# Each item is a tuple with three elements:
# - The id of the test.
# - The expression to be evaluated.
Expand All @@ -62,7 +62,7 @@
# visiting (= evaluating) the parse tree. If the expected behavior is for the parsing
# to succeed, but a `GrammarParseError` to be raised when visiting it, then set the
# expected result to the pair `(None, GrammarParseError)`.
PARAMS_SINGLE_ELEMENT_NO_INTERPOLATION: List[Tuple[str, str, Any]] = [
PARAMS_SINGLE_ELEMENT_NO_CALLBACK: List[Tuple[str, str, Any]] = [
# Special keywords.
("null", "null", None),
("true", "TrUe", True),
Expand Down Expand Up @@ -121,22 +121,6 @@
("str_backslash_noesc", r"ab\cd", r"ab\cd"),
("str_esc_illegal_1", r"\#", GrammarParseError),
("str_esc_illegal_2", "\\'\\\"", GrammarParseError),
# Quoted strings.
("str_quoted_single", "'!@#$%^&*()[]:.,\"'", '!@#$%^&*()[]:.,"'),
("str_quoted_double", '"!@#$%^&*()[]:.,\'"', "!@#$%^&*()[]:.,'"),
("str_quoted_outer_ws_single", "' a \t'", " a \t"),
("str_quoted_outer_ws_double", '" a \t"', " a \t"),
("str_quoted_int", "'123'", "123"),
("str_quoted_null", "'null'", "null"),
("str_quoted_bool", "['truE', \"FalSe\"]", ["truE", "FalSe"]),
("str_quoted_list", "'[a,b, c]'", "[a,b, c]"),
("str_quoted_dict", '"{a:b, c: d}"', "{a:b, c: d}"),
("str_quoted_backslash_noesc_single", r"'a\b'", r"a\b"),
("str_quoted_backslash_noesc_double", r'"a\b"', r"a\b"),
("str_quoted_concat_bad_2", "'Hi''there'", GrammarParseError),
("str_quoted_too_many_1", "''a'", GrammarParseError),
("str_quoted_too_many_2", "'a''", GrammarParseError),
("str_quoted_too_many_3", "''a''", GrammarParseError),
# Lists and dictionaries.
("list", "[0, 1]", [0, 1]),
(
Expand All @@ -149,37 +133,10 @@
"{a0-null-1-3.14-NaN- \t-true-False-/\\+.$%*@\\(\\)\\[\\]\\{\\}\\:\\=\\ \\\t\\,:0}",
{"a0-null-1-3.14-NaN- \t-true-False-/\\+.$%*@()[]{}:= \t,": 0},
),
(
"dict_quoted",
"{0: 1, 'a': 'b', 1.1: 1e2, null: 0.1, true: false, -inf: true}",
{0: 1, "a": "b", 1.1: 100.0, None: 0.1, True: False, -math.inf: True},
),
(
"structured_mixed",
"[10,str,3.14,true,false,inf,[1,2,3], 'quoted', \"quoted\", 'a,b,c']",
[
10,
"str",
3.14,
True,
False,
math.inf,
[1, 2, 3],
"quoted",
"quoted",
"a,b,c",
],
),
("dict_int_key", "{0: 0}", {0: 0}),
("dict_float_key", "{1.1: 0}", {1.1: 0}),
("dict_null_key", "{null: 0}", {None: 0}),
("dict_nan_like_key", "{'nan': 0}", {"nan": 0}),
("dict_list_as_key", "{[0]: 1}", GrammarParseError),
(
"dict_bool_key",
"{true: true, false: 'false'}",
{True: True, False: "false"},
),
("empty_dict", "{}", {}),
("empty_list", "[]", []),
(
Expand All @@ -189,8 +146,8 @@
),
]

# Parameters for tests of the "singleElement" rule when there are interpolations.
PARAMS_SINGLE_ELEMENT_WITH_INTERPOLATION = [
# Parameters for tests of the "singleElement" rule when callbacks are required.
PARAMS_SINGLE_ELEMENT_WITH_CALLBACK = [
# Node interpolations.
("dict_access", "${dict.a}", 0),
("list_access", "${list.0}", -1),
Expand All @@ -206,21 +163,67 @@
("dotpath_bad_type", "${dict.${float}}", (None, InterpolationResolutionError)),
("at_in_key", "${x@y}", 123),
("dollar_in_key", "${$x$y$z$}", 456),
# Interpolations in dictionaries.
# Dictionaries.
("dict_interpolation_value", "{hi: ${str}, int: ${int}}", {"hi": "hi", "int": 123}),
("dict_interpolation_key", "{${str}: 0, ${null}: 1", GrammarParseError),
# Interpolations in lists.
("dict_bool_key", "{true: true, false: 'false'}", {True: True, False: "false"}),
(
"dict_quoted",
"{0: 1, 'a': 'b', 1.1: 1e2, null: 0.1, true: false, -inf: true}",
{0: 1, "a": "b", 1.1: 100.0, None: 0.1, True: False, -math.inf: True},
),
# NaN as dictionary key (a resolver is used here to output only the key).
("dict_nan_key_1", "${first:{nan: 0}}", math.nan),
("dict_nan_key_2", "${first:{${test:nan}: 0}}", GrammarParseError),
("dict_nan_like_key", "{'nan': 0}", {"nan": 0}),
# Lists.
("list_interpolation", "[${str}, ${int}]", ["hi", 123]),
# Interpolations in unquoted strings.
(
"structured_mixed",
"[10,str,3.14,true,false,inf,[1,2,3], 'quoted', \"quoted\", 'a,b,c']",
[
10,
"str",
3.14,
True,
False,
math.inf,
[1, 2, 3],
"quoted",
"quoted",
"a,b,c",
],
),
# Unquoted strings.
("str_dollar_and_inter", "$$${str}", "$$hi"),
("str_inter", "hi_${str}", "hi_hi"),
("str_esc_illegal_3", r"\${foo\}", GrammarParseError),
# Interpolations in quoted strings.
# Quoted strings without interpolations.
("str_quoted_single", "'!@#$%^&*()[]:.,\"'", '!@#$%^&*()[]:.,"'),
("str_quoted_double", '"!@#$%^&*()[]:.,\'"', "!@#$%^&*()[]:.,'"),
("str_quoted_outer_ws_single", "' a \t'", " a \t"),
("str_quoted_outer_ws_double", '" a \t"', " a \t"),
("str_quoted_int", "'123'", "123"),
("str_quoted_null", "'null'", "null"),
("str_quoted_bool", "['truE', \"FalSe\"]", ["truE", "FalSe"]),
("str_quoted_list", "'[a,b, c]'", "[a,b, c]"),
("str_quoted_dict", '"{a:b, c: d}"', "{a:b, c: d}"),
("str_quoted_backslash_noesc_single", r"'a\b'", r"a\b"),
("str_quoted_backslash_noesc_double", r'"a\b"', r"a\b"),
("str_quoted_esc_backslash_1", r"'a\\b'", r"a\b"),
("str_quoted_esc_backslash_2", r"'a\\\b'", r"a\\b"),
("str_quoted_esc_backslash_3", r"'a\\\\b'", r"a\\b"),
("str_quoted_concat_bad_2", "'Hi''there'", GrammarParseError),
("str_quoted_too_many_1", "''a'", GrammarParseError),
("str_quoted_too_many_2", "'a''", GrammarParseError),
("str_quoted_too_many_3", "''a''", GrammarParseError),
("str_quoted_empty", "''", ""),
# Quoted strings with interpolations.
("str_quoted_inter", "'${null}'", "None"),
("str_quoted_esc_single_1", r"'ab\'cd\'\'${str}'", "ab'cd''hi"),
("str_quoted_esc_single_2", "'\"\\\\\\\\\\${foo}\\ '", r'"\${foo}\ '),
("str_quoted_esc_single_2", "'\"\\\\\\${foo}\\ '", r'"\${foo}\ '),
("str_quoted_esc_double_1", r'"ab\"cd\"\"${str}"', 'ab"cd""hi'),
("str_quoted_esc_double_2", '"\'\\\\\\\\\\${foo}\\ "', r"'\${foo}\ "),
("str_quoted_esc_double_2", '"\'\\\\\\${foo}\\ "', r"'\${foo}\ "),
("str_quoted_concat_bad_1", '"Hi "${str}', GrammarParseError),
# Whitespaces.
("ws_inter_node_outer", "${ \tdict.a \t}", 0),
Expand Down Expand Up @@ -267,9 +270,6 @@
("float_resolver_noquote", "${1.1:1,2,3}", GrammarParseError),
("float_resolver_exp", "${1e1:1,2,3}", GrammarParseError),
("inter_float_resolver", "${${float}:1,2,3}", (None, InterpolationResolutionError)),
# NaN as dictionary key (a resolver is used here to output only the key).
("dict_nan_key_1", "${first:{nan: 0}}", math.nan),
("dict_nan_key_2", "${first:{${test:nan}: 0}}", GrammarParseError),
]

# Parameters for tests of the "configValue" rule (may contain node
Expand Down Expand Up @@ -309,6 +309,7 @@
("str_top_middle_escapes", r"abc\\\\\${str}", r"abc\\${str}"),
("str_top_trailing_escapes", "${str}" + "\\" * 5, "hi" + "\\" * 3),
("str_top_concat_interpolations", "${null}${float}", "None1.2"),
("str_top_empty", "", ""),
# Whitespaces.
("ws_toplevel", " \tab ${str} cd ${int}\t", " \tab hi cd 123\t"),
# Unmatched braces.
Expand All @@ -333,34 +334,33 @@ class TestOmegaConfGrammar:
Test most grammar constructs.
Each method in this class tests the validity of expressions in a specific
setting. For instance, `test_single_element_no_interpolation()` tests the
"singleElement" parsing rule on expressions that do not contain interpolations
setting. For instance, `test_single_element_no_callback()` tests the
"singleElement" parsing rule on expressions that do not need callbacks
(which allows for faster tests without using any config object).
Tests that actually need a config object all re-use the same `BASE_TEST_CFG`
config, to avoid creating a new config for each test.
"""

@parametrize_from(PARAMS_SINGLE_ELEMENT_NO_INTERPOLATION)
def test_single_element_no_interpolation(
self, definition: str, expected: Any
) -> None:
@parametrize_from(PARAMS_SINGLE_ELEMENT_NO_CALLBACK)
def test_single_element_no_callback(self, definition: str, expected: Any) -> None:
parse_tree, expected_visit = self._parse("singleElement", definition, expected)
if parse_tree is None:
return

# Since there are no interpolations here, we do not need to provide
# callbacks to resolve them, and the quoted string callback can simply
# be the identity.
def fake_callback(*args: Any, **kw: Any) -> None:
assert False # must not be called

visitor = grammar_visitor.GrammarVisitor(
node_interpolation_callback=None, # type: ignore
resolver_interpolation_callback=None, # type: ignore
quoted_string_callback=lambda s: s,
node_interpolation_callback=fake_callback,
resolver_interpolation_callback=fake_callback,
quoted_string_callback=fake_callback, # type: ignore
)

self._visit(lambda: visitor.visit(parse_tree), expected_visit)

@parametrize_from(PARAMS_SINGLE_ELEMENT_WITH_INTERPOLATION)
def test_single_element_with_resolver(
@parametrize_from(PARAMS_SINGLE_ELEMENT_WITH_CALLBACK)
def test_single_element_with_callback(
self, restore_resolvers: Any, definition: str, expected: Any
) -> None:
parse_tree, expected_visit = self._parse("singleElement", definition, expected)
Expand Down

0 comments on commit 7424df5

Please sign in to comment.