Skip to content

Commit

Permalink
fix: Replace non ascii characters with question marks (#3464)
Browse files Browse the repository at this point in the history
* Iniparser is only setup to parse string.printable characters. This
  doesn't include non ascii characters from various languages, which
  causes an exception when they're in a config file. So replace the
  characters with question marks.
* Fixes #3450

Signed-off-by: Ryan Blakley <[email protected]>
(cherry picked from commit 1fe7320)
  • Loading branch information
ryan-blakley authored and xiangce committed Jul 13, 2022
1 parent 5d40af7 commit 06ab1d4
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 3 deletions.
17 changes: 14 additions & 3 deletions insights/parsr/iniparser.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import string

from insights.parsr import (Comma, EOF, EOL, DoubleQuotedString,
HangingString, InSet, LeftBracket, Lift, LineEnd, Literal, Many,
OneLineComment, Opt, PosMarker, RightBracket, skip_none, String,
WithIndent, WS, WSChar)
HangingString, InSet, LeftBracket, Lift, LineEnd, Literal, Many,
OneLineComment, Opt, PosMarker, RightBracket, skip_none, String,
WithIndent, WS, WSChar)
from insights.parsr.query import Directive, Entry, eq, Section
from six import PY2


class Error(Exception):
Expand Down Expand Up @@ -97,5 +98,15 @@ def apply_defaults(cfg, include_defaults):
Doc = Many(Comment | Sect).map(skip_none)
Top = Doc << WS << EOF

if PY2:
# For py2 sub all non ascii chars for question marks,
# since it doesn't support unicode encoding/decoding well.
from re import sub
content = sub(r"[^\x00-\x7F]", "?", content)
else:
# Encode and replace unicode characters,
# then decode again before processing content.
content = content.encode('ascii', 'replace').decode()

res = Entry(children=Top(content), src=ctx)
return apply_defaults(res, return_defaults)
28 changes: 28 additions & 0 deletions insights/parsr/tests/test_iniparser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# coding:UTF-8
from insights.parsr.iniparser import parse_doc
from insights.parsr.query import last
from six import PY2


DATA = """
Expand Down Expand Up @@ -32,6 +35,17 @@
""".strip()

DATA_UNICODE_TEST = """
[global]
secret-name = "vsphere-creds"
secret-namespace = kube-system
insecure-flag = 1
[workspace]
datacenter = 1-测试部
folder = "/1-测试部/xxxxxxxxx"
""".strip()


def test_iniparser():
res = parse_doc(DATA, None)
Expand All @@ -58,3 +72,17 @@ def test_multiple_values():
def test_no_value():
res = parse_doc(DATA, None)
assert res["novalue"]["the_force"][0].value is None


def test_unicode():
res = parse_doc(DATA_UNICODE_TEST, None)
assert res["global"]["insecure-flag"][last].value == "1"
assert res["global"]["secret-name"][last].value == '"vsphere-creds"'
assert res["global"]["secret-namespace"][last].value == "kube-system"

if PY2:
assert res["workspace"]["datacenter"][last].value == "1-?????????"
assert res["workspace"]["folder"][last].value == '"/1-?????????/xxxxxxxxx"'
else:
assert res["workspace"]["datacenter"][last].value == "1-???"
assert res["workspace"]["folder"][last].value == '"/1-???/xxxxxxxxx"'

0 comments on commit 06ab1d4

Please sign in to comment.