From 0bf6bfcb09eb108bf7670568e5cbfa180dd05e6e Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 20 Apr 2022 18:02:57 +0900 Subject: [PATCH 1/6] Use `locale.getencoding()` instead of locale.getpreferredencoding() --- Doc/howto/curses.rst | 3 +-- Doc/library/csv.rst | 2 +- Doc/library/curses.rst | 4 ++-- Doc/library/functions.rst | 11 +++++------ Doc/library/os.rst | 6 +++--- Lib/test/libregrtest/main.py | 3 +-- Lib/test/pythoninfo.py | 2 +- Lib/test/support/__init__.py | 2 +- Lib/test/test__locale.py | 2 +- Lib/test/test_builtin.py | 2 +- Lib/test/test_cmd_line.py | 2 +- Lib/test/test_io.py | 2 +- Lib/test/test_locale.py | 8 +++++++- Lib/test/test_mimetypes.py | 5 ----- Lib/test/test_xml_etree.py | 6 +++--- Lib/xml/etree/ElementTree.py | 2 +- 16 files changed, 30 insertions(+), 32 deletions(-) diff --git a/Doc/howto/curses.rst b/Doc/howto/curses.rst index c0149ffff37716..26c4ece5ae6df4 100644 --- a/Doc/howto/curses.rst +++ b/Doc/howto/curses.rst @@ -299,8 +299,7 @@ The :meth:`~curses.window.addstr` method takes a Python string or bytestring as the value to be displayed. The contents of bytestrings are sent to the terminal as-is. Strings are encoded to bytes using the value of the window's :attr:`encoding` attribute; this defaults to -the default system encoding as returned by -:func:`locale.getpreferredencoding`. +the default system encoding as returned by :func:`locale.getencoding`. The :meth:`~curses.window.addch` methods take a character, which can be either a string of length 1, a bytestring of length 1, or an integer. diff --git a/Doc/library/csv.rst b/Doc/library/csv.rst index 3a7817cfdfad87..9dec7240d9c50f 100644 --- a/Doc/library/csv.rst +++ b/Doc/library/csv.rst @@ -542,7 +542,7 @@ The corresponding simplest possible writing example is:: Since :func:`open` is used to open a CSV file for reading, the file will by default be decoded into unicode using the system default -encoding (see :func:`locale.getpreferredencoding`). To decode a file +encoding (see :func:`locale.getencoding`). To decode a file using a different encoding, use the ``encoding`` argument of open:: import csv diff --git a/Doc/library/curses.rst b/Doc/library/curses.rst index 37e822c0e2b207..c5800a5782d7cd 100644 --- a/Doc/library/curses.rst +++ b/Doc/library/curses.rst @@ -37,7 +37,7 @@ Linux and the BSD variants of Unix. import locale locale.setlocale(locale.LC_ALL, '') - code = locale.getpreferredencoding() + code = locale.getencoding() Then use *code* as the encoding for :meth:`str.encode` calls. @@ -924,7 +924,7 @@ the following methods and attributes: Encoding used to encode method arguments (Unicode strings and characters). The encoding attribute is inherited from the parent window when a subwindow is created, for example with :meth:`window.subwin`. By default, the locale - encoding is used (see :func:`locale.getpreferredencoding`). + encoding is used (see :func:`locale.getencoding`). .. versionadded:: 3.3 diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index eaa4d482ce3fc9..bc45a6dccc6cbd 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -1123,8 +1123,8 @@ are always available. They are listed here in alphabetical order. (which on *some* Unix systems, means that *all* writes append to the end of the file regardless of the current seek position). In text mode, if *encoding* is not specified the encoding used is platform-dependent: - ``locale.getpreferredencoding(False)`` is called to get the current locale - encoding. (For reading and writing raw bytes use binary mode and leave + :func:`locale.getencoding()` is called to get the current locale encoding. + (For reading and writing raw bytes use binary mode and leave *encoding* unspecified.) The available modes are: .. _filemodes: @@ -1179,10 +1179,9 @@ are always available. They are listed here in alphabetical order. *encoding* is the name of the encoding used to decode or encode the file. This should only be used in text mode. The default encoding is platform - dependent (whatever :func:`locale.getpreferredencoding` returns), but any - :term:`text encoding` supported by Python - can be used. See the :mod:`codecs` module for - the list of supported encodings. + dependent (whatever :func:`locale.getencoding` returns), but any + :term:`text encoding` supported by Python can be used. + See the :mod:`codecs` module for the list of supported encodings. *errors* is an optional string that specifies how encoding and decoding errors are to be handled—this cannot be used in binary mode. diff --git a/Doc/library/os.rst b/Doc/library/os.rst index c22bf56a9f2cde..471890e74c8e58 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -105,15 +105,15 @@ of the UTF-8 encoding: * Use UTF-8 as the :term:`filesystem encoding `. -* :func:`sys.getfilesystemencoding()` returns ``'UTF-8'``. -* :func:`locale.getpreferredencoding()` returns ``'UTF-8'`` (the *do_setlocale* +* :func:`sys.getfilesystemencoding()` returns ``'utf-8'``. +* :func:`locale.getpreferredencoding()` returns ``'utf-8'`` (the *do_setlocale* argument has no effect). * :data:`sys.stdin`, :data:`sys.stdout`, and :data:`sys.stderr` all use UTF-8 as their text encoding, with the ``surrogateescape`` :ref:`error handler ` being enabled for :data:`sys.stdin` and :data:`sys.stdout` (:data:`sys.stderr` continues to use ``backslashreplace`` as it does in the default locale-aware mode) -* On Unix, :func:`os.device_encoding` returns ``'UTF-8'`` rather than the +* On Unix, :func:`os.device_encoding` returns ``'utf-8'`` rather than the device encoding. Note that the standard stream settings in UTF-8 mode can be overridden by diff --git a/Lib/test/libregrtest/main.py b/Lib/test/libregrtest/main.py index e7e3dde0b0a66d..0cacccfc0b5e39 100644 --- a/Lib/test/libregrtest/main.py +++ b/Lib/test/libregrtest/main.py @@ -482,8 +482,7 @@ def display_header(self): if cpu_count: print("== CPU count:", cpu_count) print("== encodings: locale=%s, FS=%s" - % (locale.getpreferredencoding(False), - sys.getfilesystemencoding())) + % (locale.getencoding(), sys.getfilesystemencoding())) def get_tests_result(self): result = [] diff --git a/Lib/test/pythoninfo.py b/Lib/test/pythoninfo.py index b00830c279e876..f9921081b8a3e3 100644 --- a/Lib/test/pythoninfo.py +++ b/Lib/test/pythoninfo.py @@ -155,7 +155,7 @@ def collect_platform(info_add): def collect_locale(info_add): import locale - info_add('locale.encoding', locale.getpreferredencoding(False)) + info_add('locale.encoding', locale.getencoding()) def collect_builtins(info_add): diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index c5666d66f47825..304859a2890ebc 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -1445,7 +1445,7 @@ def skip_if_buggy_ucrt_strfptime(test): global _buggy_ucrt if _buggy_ucrt is None: if(sys.platform == 'win32' and - locale.getpreferredencoding(False) == 'cp65001' and + locale.getencoding() == 'cp65001' and time.localtime().tm_zone == ''): _buggy_ucrt = True else: diff --git a/Lib/test/test__locale.py b/Lib/test/test__locale.py index e25c92c2c82c57..b3bc54cd551048 100644 --- a/Lib/test/test__locale.py +++ b/Lib/test/test__locale.py @@ -43,7 +43,7 @@ def setUpModule(): locale.setlocale(locale.LC_ALL, loc) except Error: continue - encoding = locale.getpreferredencoding(False) + encoding = locale.getencoding() try: localeconv() except Exception as err: diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index a601a524d6eb72..29039230201aca 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -1204,7 +1204,7 @@ def test_open_default_encoding(self): del os.environ[key] self.write_testfile() - current_locale_encoding = locale.getpreferredencoding(False) + current_locale_encoding = locale.getencoding() with warnings.catch_warnings(): warnings.simplefilter("ignore", EncodingWarning) fp = open(TESTFN, 'w') diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index 84eab71f977014..e8f1964c2a40d5 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -216,7 +216,7 @@ def test_undecodable_code(self): code = ( b'import locale; ' b'print(ascii("' + undecodable + b'"), ' - b'locale.getpreferredencoding())') + b'locale.getencoding())') p = subprocess.Popen( [sys.executable, "-c", code], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 45bf81b61f4163..5528c461e58ae6 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -2726,7 +2726,7 @@ def test_default_encoding(self): if key in os.environ: del os.environ[key] - current_locale_encoding = locale.getpreferredencoding(False) + current_locale_encoding = locale.getencoding() b = self.BytesIO() with warnings.catch_warnings(): warnings.simplefilter("ignore", EncodingWarning) diff --git a/Lib/test/test_locale.py b/Lib/test/test_locale.py index 774b0fcd333449..c4023e6d3ad95e 100644 --- a/Lib/test/test_locale.py +++ b/Lib/test/test_locale.py @@ -363,7 +363,7 @@ class TestEnUSCollation(BaseLocalizedTest, TestCollation): locale_type = locale.LC_ALL def setUp(self): - enc = codecs.lookup(locale.getpreferredencoding(False) or 'ascii').name + enc = codecs.lookup(locale.getencoding() or 'ascii').name if enc not in ('utf-8', 'iso8859-1', 'cp1252'): raise unittest.SkipTest('encoding not suitable') if enc != 'iso8859-1' and (sys.platform == 'darwin' or is_android or @@ -533,6 +533,12 @@ def test_defaults_UTF8(self): if orig_getlocale is not None: _locale._getdefaultlocale = orig_getlocale + def test_getencoding(self): + # Invoke getencoding to make sure it does not cause exceptions. + enc = locale.getencoding() + # make sure it is valid + codecs.lookup(enc) + def test_getpreferredencoding(self): # Invoke getpreferredencoding to make sure it does not cause exceptions. enc = locale.getpreferredencoding() diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py index 3477b18376a4f3..7e3bfb1b99f6ed 100644 --- a/Lib/test/test_mimetypes.py +++ b/Lib/test/test_mimetypes.py @@ -145,11 +145,6 @@ def test_guess_all_types(self): self.assertNotIn('.no-such-ext', all) def test_encoding(self): - getpreferredencoding = locale.getpreferredencoding - self.addCleanup(setattr, locale, 'getpreferredencoding', - getpreferredencoding) - locale.getpreferredencoding = lambda: 'ascii' - filename = support.findfile("mime.types") mimes = mimetypes.MimeTypes([filename]) exts = mimes.guess_all_extensions('application/vnd.geocube+xml', diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 60a41506d8795d..d048e155c62ad1 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -975,7 +975,7 @@ def test_tostring_xml_declaration(self): def test_tostring_xml_declaration_unicode_encoding(self): elem = ET.XML('') - preferredencoding = locale.getpreferredencoding() + preferredencoding = locale.getpreferredencoding(False) self.assertEqual( f"\n", ET.tostring(elem, encoding='unicode', xml_declaration=True) @@ -983,7 +983,7 @@ def test_tostring_xml_declaration_unicode_encoding(self): def test_tostring_xml_declaration_cases(self): elem = ET.XML('ø') - preferredencoding = locale.getpreferredencoding() + preferredencoding = locale.getpreferredencoding(False) TESTCASES = [ # (expected_retval, encoding, xml_declaration) # ... xml_declaration = None @@ -1048,7 +1048,7 @@ def test_tostringlist_xml_declaration(self): b"\n" ) - preferredencoding = locale.getpreferredencoding() + preferredencoding = locale.getpreferredencoding(False) stringlist = ET.tostringlist(elem, encoding='unicode', xml_declaration=True) self.assertEqual( ''.join(stringlist), diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 5249c7ab82b84b..419dd70ff9476a 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -737,7 +737,7 @@ def write(self, file_or_filename, if enc_lower == "unicode": # Retrieve the default encoding for the xml declaration import locale - declared_encoding = locale.getpreferredencoding() + declared_encoding = locale.getpreferredencoding(False) write("\n" % ( declared_encoding,)) if method == "text": From 8cd88b9e5f065b5b167ca3647aee7a968aae56e7 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 20 Apr 2022 19:10:13 +0900 Subject: [PATCH 2/6] Revert etree changes. --- Lib/test/test_xml_etree.py | 6 +++--- Lib/xml/etree/ElementTree.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index d048e155c62ad1..60a41506d8795d 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -975,7 +975,7 @@ def test_tostring_xml_declaration(self): def test_tostring_xml_declaration_unicode_encoding(self): elem = ET.XML('') - preferredencoding = locale.getpreferredencoding(False) + preferredencoding = locale.getpreferredencoding() self.assertEqual( f"\n", ET.tostring(elem, encoding='unicode', xml_declaration=True) @@ -983,7 +983,7 @@ def test_tostring_xml_declaration_unicode_encoding(self): def test_tostring_xml_declaration_cases(self): elem = ET.XML('ø') - preferredencoding = locale.getpreferredencoding(False) + preferredencoding = locale.getpreferredencoding() TESTCASES = [ # (expected_retval, encoding, xml_declaration) # ... xml_declaration = None @@ -1048,7 +1048,7 @@ def test_tostringlist_xml_declaration(self): b"\n" ) - preferredencoding = locale.getpreferredencoding(False) + preferredencoding = locale.getpreferredencoding() stringlist = ET.tostringlist(elem, encoding='unicode', xml_declaration=True) self.assertEqual( ''.join(stringlist), diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 419dd70ff9476a..5249c7ab82b84b 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -737,7 +737,7 @@ def write(self, file_or_filename, if enc_lower == "unicode": # Retrieve the default encoding for the xml declaration import locale - declared_encoding = locale.getpreferredencoding(False) + declared_encoding = locale.getpreferredencoding() write("\n" % ( declared_encoding,)) if method == "text": From 3005858591fa144aa2fa54316f1905904bec1974 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 20 Apr 2022 19:34:45 +0900 Subject: [PATCH 3/6] Apply suggestions from code review Co-authored-by: Victor Stinner --- Lib/test/pythoninfo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/pythoninfo.py b/Lib/test/pythoninfo.py index f9921081b8a3e3..39301e6397aab3 100644 --- a/Lib/test/pythoninfo.py +++ b/Lib/test/pythoninfo.py @@ -155,7 +155,7 @@ def collect_platform(info_add): def collect_locale(info_add): import locale - info_add('locale.encoding', locale.getencoding()) + info_add('locale.getencoding', locale.getencoding()) def collect_builtins(info_add): From 951875a976718f94b5ae92ee3a3b2f7edb5e166e Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 21 Apr 2022 11:44:27 +0900 Subject: [PATCH 4/6] Apply suggestions from code review Co-authored-by: Victor Stinner --- Lib/test/support/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 304859a2890ebc..3b2f33979db9a8 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -1445,7 +1445,7 @@ def skip_if_buggy_ucrt_strfptime(test): global _buggy_ucrt if _buggy_ucrt is None: if(sys.platform == 'win32' and - locale.getencoding() == 'cp65001' and + locale.getencoding() == 'cp65001' and time.localtime().tm_zone == ''): _buggy_ucrt = True else: From 38601b07fadbc7ddeba2dad657dbbead3a99cff8 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 21 Apr 2022 11:54:30 +0900 Subject: [PATCH 5/6] Apply suggested changes. --- Doc/library/curses.rst | 18 ++---------------- Lib/test/test_locale.py | 2 ++ Lib/test/test_mimetypes.py | 1 - 3 files changed, 4 insertions(+), 17 deletions(-) diff --git a/Doc/library/curses.rst b/Doc/library/curses.rst index c5800a5782d7cd..a7cc4952778011 100644 --- a/Doc/library/curses.rst +++ b/Doc/library/curses.rst @@ -27,20 +27,6 @@ Linux and the BSD variants of Unix. Whenever the documentation mentions a *character string* it can be specified as a Unicode string or a byte string. -.. note:: - - Since version 5.4, the ncurses library decides how to interpret non-ASCII data - using the ``nl_langinfo`` function. That means that you have to call - :func:`locale.setlocale` in the application and encode Unicode strings - using one of the system's available encodings. This example uses the - system's default encoding:: - - import locale - locale.setlocale(locale.LC_ALL, '') - code = locale.getencoding() - - Then use *code* as the encoding for :meth:`str.encode` calls. - .. seealso:: Module :mod:`curses.ascii` @@ -923,8 +909,8 @@ the following methods and attributes: Encoding used to encode method arguments (Unicode strings and characters). The encoding attribute is inherited from the parent window when a subwindow - is created, for example with :meth:`window.subwin`. By default, the locale - encoding is used (see :func:`locale.getencoding`). + is created, for example with :meth:`window.subwin`. + By default, current locale encoding is used (see :func:`locale.getencoding`). .. versionadded:: 3.3 diff --git a/Lib/test/test_locale.py b/Lib/test/test_locale.py index c4023e6d3ad95e..5cb6edc52d777c 100644 --- a/Lib/test/test_locale.py +++ b/Lib/test/test_locale.py @@ -536,6 +536,8 @@ def test_defaults_UTF8(self): def test_getencoding(self): # Invoke getencoding to make sure it does not cause exceptions. enc = locale.getencoding() + self.assertIsInstance(enc, str) + self.assertNotEqual(enc, "") # make sure it is valid codecs.lookup(enc) diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py index 7e3bfb1b99f6ed..3c9861b5d95e58 100644 --- a/Lib/test/test_mimetypes.py +++ b/Lib/test/test_mimetypes.py @@ -1,5 +1,4 @@ import io -import locale import mimetypes import pathlib import sys From d8e8c2623f5ed9d07256d692abd2b64d267ca24d Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 21 Apr 2022 21:59:16 +0900 Subject: [PATCH 6/6] Remove trailing spaces --- Lib/test/test_mimetypes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py index 3c9861b5d95e58..f2b103693a9b2c 100644 --- a/Lib/test/test_mimetypes.py +++ b/Lib/test/test_mimetypes.py @@ -32,7 +32,7 @@ def tearDownModule(): class MimeTypesTestCase(unittest.TestCase): def setUp(self): self.db = mimetypes.MimeTypes() - + def test_case_sensitivity(self): eq = self.assertEqual eq(self.db.guess_type("foobar.HTML"), self.db.guess_type("foobar.html"))