From 8f81901af3a35ee3a7b2c055ce11b70a59d5ec00 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 16 Apr 2024 12:56:16 +0900 Subject: [PATCH 1/2] [3.12] gh-77102: site: try utf-8 and fallback to locale encoding when reading .pth file (GH-117802) (cherry picked from commit 6dc661bc9f65e9923eafbcdbf18bcc57eebbf6a4) Co-authored-by: Inada Naoki --- Doc/library/site.rst | 4 ++ Lib/site.py | 61 +++++++++++-------- ...4-04-12-17-37-11.gh-issue-77102.Mk6X_E.rst | 3 + 3 files changed, 42 insertions(+), 26 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-04-12-17-37-11.gh-issue-77102.Mk6X_E.rst diff --git a/Doc/library/site.rst b/Doc/library/site.rst index 2dc9fb09d727e2..e52bbd32d4d493 100644 --- a/Doc/library/site.rst +++ b/Doc/library/site.rst @@ -74,6 +74,10 @@ with ``import`` (followed by space or tab) are executed. Limiting a code chunk to a single line is a deliberate measure to discourage putting anything more complex here. +.. versionchanged:: 3.13 + The :file:`.pth` files are now decoded by UTF-8 at first and then by the + :term:`locale encoding` if it fails. + .. index:: single: package triple: path; configuration; file diff --git a/Lib/site.py b/Lib/site.py index 924b2460d96976..b3a4916161244a 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -179,35 +179,44 @@ def addpackage(sitedir, name, known_paths): return _trace(f"Processing .pth file: {fullname!r}") try: - # locale encoding is not ideal especially on Windows. But we have used - # it for a long time. setuptools uses the locale encoding too. - f = io.TextIOWrapper(io.open_code(fullname), encoding="locale") + with io.open_code(fullname) as f: + pth_content = f.read() except OSError: return - with f: - for n, line in enumerate(f): - if line.startswith("#"): - continue - if line.strip() == "": + + try: + pth_content = pth_content.decode() + except UnicodeDecodeError: + # Fallback to locale encoding for backward compatibility. + # We will deprecate this fallback in the future. + import locale + pth_content = pth_content.decode(locale.getencoding()) + _trace(f"Cannot read {fullname!r} as UTF-8. " + f"Using fallback encoding {locale.getencoding()!r}") + + for n, line in enumerate(pth_content.splitlines(), 1): + if line.startswith("#"): + continue + if line.strip() == "": + continue + try: + if line.startswith(("import ", "import\t")): + exec(line) continue - try: - if line.startswith(("import ", "import\t")): - exec(line) - continue - line = line.rstrip() - dir, dircase = makepath(sitedir, line) - if not dircase in known_paths and os.path.exists(dir): - sys.path.append(dir) - known_paths.add(dircase) - except Exception as exc: - print("Error processing line {:d} of {}:\n".format(n+1, fullname), - file=sys.stderr) - import traceback - for record in traceback.format_exception(exc): - for line in record.splitlines(): - print(' '+line, file=sys.stderr) - print("\nRemainder of file ignored", file=sys.stderr) - break + line = line.rstrip() + dir, dircase = makepath(sitedir, line) + if dircase not in known_paths and os.path.exists(dir): + sys.path.append(dir) + known_paths.add(dircase) + except Exception as exc: + print(f"Error processing line {n:d} of {fullname}:\n", + file=sys.stderr) + import traceback + for record in traceback.format_exception(exc): + for line in record.splitlines(): + print(' '+line, file=sys.stderr) + print("\nRemainder of file ignored", file=sys.stderr) + break if reset: known_paths = None return known_paths diff --git a/Misc/NEWS.d/next/Library/2024-04-12-17-37-11.gh-issue-77102.Mk6X_E.rst b/Misc/NEWS.d/next/Library/2024-04-12-17-37-11.gh-issue-77102.Mk6X_E.rst new file mode 100644 index 00000000000000..6f91251126dc7b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-04-12-17-37-11.gh-issue-77102.Mk6X_E.rst @@ -0,0 +1,3 @@ +:mod:`site` module now parses ``.pth`` file with UTF-8 first, and +:term:`locale encoding` if ``UnicodeDecodeError`` happened. It supported +only locale encoding before. From 5cba1d3d37f5b565034a0b52d939e5427bde1ba7 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 16 Apr 2024 13:09:00 +0900 Subject: [PATCH 2/2] remove versionchanged:: 3.13 --- Doc/library/site.rst | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Doc/library/site.rst b/Doc/library/site.rst index e52bbd32d4d493..2dc9fb09d727e2 100644 --- a/Doc/library/site.rst +++ b/Doc/library/site.rst @@ -74,10 +74,6 @@ with ``import`` (followed by space or tab) are executed. Limiting a code chunk to a single line is a deliberate measure to discourage putting anything more complex here. -.. versionchanged:: 3.13 - The :file:`.pth` files are now decoded by UTF-8 at first and then by the - :term:`locale encoding` if it fails. - .. index:: single: package triple: path; configuration; file