python · methane · Apr 9, 2022 · Mar 20, 2022 · Mar 31, 2022 · Mar 31, 2022
diff --git a/Doc/glossary.rst b/Doc/glossary.rst
@@ -706,12 +706,13 @@ Glossary
 
    locale encoding
       On Unix, it is the encoding of the LC_CTYPE locale. It can be set with
-      ``locale.setlocale(locale.LC_CTYPE, new_locale)``.
+      :func:`locale.setlocale(locale.LC_CTYPE, new_locale) <locale.setlocale>`.
 
-      On Windows, it is the ANSI code page (ex: ``cp1252``).
+      On Windows, it is the ANSI code page (ex: ``"cp1252"``).
 
-      ``locale.getpreferredencoding(False)`` can be used to get the locale
-      encoding.
+      On Android and VxWorks, Python uses ``"utf-8"`` as the locale encoding.
+
+      ``locale.getencoding()`` can be used to get the locale encoding.
 
       Python uses the :term:`filesystem encoding and error handler` to convert
       between Unicode filenames and bytes filenames.

diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst
@@ -327,16 +327,37 @@ The :mod:`locale` module defines the following exception and functions:
    is not necessary or desired, *do_setlocale* should be set to ``False``.
 
    On Android or if the :ref:`Python UTF-8 Mode <utf8-mode>` is enabled, always
-   return ``'UTF-8'``, the :term:`locale encoding` and the *do_setlocale*
+   return ``'utf-8'``, the :term:`locale encoding` and the *do_setlocale*
    argument are ignored.
 
    The :ref:`Python preinitialization <c-preinit>` configures the LC_CTYPE
    locale. See also the :term:`filesystem encoding and error handler`.
 
    .. versionchanged:: 3.7
-      The function now always returns ``UTF-8`` on Android or if the
+      The function now always returns ``"UTF-8"`` on Android or if the
-      The function now always returns ``"UTF-8"`` on Android or if the
+      The function now always returns ``"utf-8"`` on Android or if the
-      The function now always returns ``"UTF-8"`` on Android or if the
+      The function now always returns ``"utf-8"`` on Android or if the
       :ref:`Python UTF-8 Mode <utf8-mode>` is enabled.
 
+   .. versionchanged:: 3.11
+      The function now returns ``"utf-8"`` instead of ``"UTF-8"`` on Android
+      or if the :ref:`Python UTF-8 Mode <utf8-mode>` is enabled.
+
+
+.. function:: getencoding()
+
+   Get the current :term:`locale encoding`:
+
+   * On Android and VxWorks, return ``"utf-8"``.
+   * On Unix, return the encoding of the current :data:`LC_CTYPE` locale.
+     Return ``"utf-8"`` if ``nl_langinfo(CODESET)`` returns an empty string:
+     for example, if the current LC_CTYPE locale is not supported.
+   * On Windows, return the ANSI code page.
+
+   This function is similar to
+   :func:`getpreferredencoding(False) <getpreferredencoding>` except this
+   function ignores the :ref:`Python UTF-8 Mode <utf8-mode>`.
+
+   .. versionadded:: 3.11
+
 
 .. function:: normalize(localename)
 

diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst
@@ -274,6 +274,12 @@ inspect
 * Add :func:`inspect.ismethodwrapper` for checking if the type of an object is a
   :class:`~types.MethodWrapperType`. (Contributed by Hakan Çelik in :issue:`29418`.)
 
+locale
+------
+
+* Add :func:`locale.getencoding` to get the current locale encoding. It is similar to
+  ``locale.getpreferredencoding(False)`` but ignores :ref:`UTF-8 Mode <utf8-mode>`.
-  ``locale.getpreferredencoding(False)`` but ignores :ref:`UTF-8 Mode <utf8-mode>`.
+  ``locale.getpreferredencoding(False)`` but ignores the :ref:`Python UTF-8 Mode <utf8-mode>`.
-  ``locale.getpreferredencoding(False)`` but ignores :ref:`UTF-8 Mode <utf8-mode>`.
+  ``locale.getpreferredencoding(False)`` but ignores the :ref:`Python UTF-8 Mode <utf8-mode>`.
+
 math
 ----
 

diff --git a/Lib/locale.py b/Lib/locale.py
@@ -28,7 +28,7 @@
            "setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm",
            "str", "atof", "atoi", "format", "format_string", "currency",
            "normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY",
-           "LC_NUMERIC", "LC_ALL", "CHAR_MAX"]
+           "LC_NUMERIC", "LC_ALL", "CHAR_MAX", "getencoding"]
 
 def _strcoll(a,b):
     """ strcoll(string,string) -> int.
@@ -637,45 +637,45 @@ def resetlocale(category=LC_ALL):
 
 
 try:
-    from _locale import _get_locale_encoding
+    from _locale import getencoding
 except ImportError:
-    def _get_locale_encoding():
+    def getencoding():
         if hasattr(sys, 'getandroidapilevel'):
             # On Android langinfo.h and CODESET are missing, and UTF-8 is
             # always used in mbstowcs() and wcstombs().
-            return 'UTF-8'
-        if sys.flags.utf8_mode:
-            return 'UTF-8'
+            return 'utf-8'
         encoding = getdefaultlocale()[1]
         if encoding is None:
-            # LANG not set, default conservatively to ASCII
-            encoding = 'ascii'
+            # LANG not set, default to UTF-8
+            encoding = 'utf-8'
         return encoding
 
 try:
     CODESET
 except NameError:
     def getpreferredencoding(do_setlocale=True):
         """Return the charset that the user is likely using."""
-        return _get_locale_encoding()
+        if sys.flags.utf8_mode:
+            return 'utf-8'
+        return getencoding()
 else:
     # On Unix, if CODESET is available, use that.
     def getpreferredencoding(do_setlocale=True):
         """Return the charset that the user is likely using,
         according to the system configuration."""
         if sys.flags.utf8_mode:
-            return 'UTF-8'
+            return 'utf-8'
 
         if not do_setlocale:
-            return _get_locale_encoding()
+            return getencoding()
 
         old_loc = setlocale(LC_CTYPE)
         try:
             try:
                 setlocale(LC_CTYPE, "")
             except Error:
                 pass
-            return _get_locale_encoding()
+            return getencoding()
         finally:
             setlocale(LC_CTYPE, old_loc)
 

diff --git a/Lib/test/test_utf8_mode.py b/Lib/test/test_utf8_mode.py
@@ -203,12 +203,12 @@ def test_pyio_encoding(self):
     def test_locale_getpreferredencoding(self):
         code = 'import locale; print(locale.getpreferredencoding(False), locale.getpreferredencoding(True))'
         out = self.get_output('-X', 'utf8', '-c', code)
-        self.assertEqual(out, 'UTF-8 UTF-8')
+        self.assertEqual(out, 'utf-8 utf-8')
 
         for loc in POSIX_LOCALES:
             with self.subTest(LC_ALL=loc):
                 out = self.get_output('-X', 'utf8', '-c', code, LC_ALL=loc)
-                self.assertEqual(out, 'UTF-8 UTF-8')
+                self.assertEqual(out, 'utf-8 utf-8')
 
     @unittest.skipIf(MS_WINDOWS, 'test specific to Unix')
     def test_cmd_line(self):
@@ -276,7 +276,7 @@ def test_device_encoding(self):
         # In UTF-8 Mode, device_encoding(fd) returns "UTF-8" if fd is a TTY
         with open(filename, encoding="utf8") as fp:
             out = fp.read().rstrip()
-        self.assertEqual(out, 'True UTF-8')
+        self.assertEqual(out, 'True utf-8')
 
 
 if __name__ == "__main__":

diff --git a/Misc/NEWS.d/next/Library/2022-04-06-11-54-53.bpo-47000.2nmAR1.rst b/Misc/NEWS.d/next/Library/2022-04-06-11-54-53.bpo-47000.2nmAR1.rst
@@ -0,0 +1 @@
+Add :func:`locale.getencoding`.
diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c
@@ -1145,7 +1145,13 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
         }
     }
     if (encoding == NULL && self->encoding == NULL) {
-        self->encoding = _Py_GetLocaleEncodingObject();
+        if (_PyRuntime.preconfig.utf8_mode) {
+            _Py_DECLARE_STR(utf_8, "utf-8");
+            self->encoding = Py_NewRef(&_Py_STR(utf_8));
+        }
+        else {
+            self->encoding = _Py_GetLocaleEncodingObject();
+        }
         if (self->encoding == NULL) {
             goto error;
         }

diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c
@@ -773,14 +773,14 @@ _locale_bind_textdomain_codeset_impl(PyObject *module, const char *domain,
 
 
 /*[clinic input]
-_locale._get_locale_encoding
+_locale.getencoding
 
 Get the current locale encoding.
 [clinic start generated code]*/
 
 static PyObject *
-_locale__get_locale_encoding_impl(PyObject *module)
-/*[clinic end generated code: output=e8e2f6f6f184591a input=513d9961d2f45c76]*/
+_locale_getencoding_impl(PyObject *module)
+/*[clinic end generated code: output=86b326b971872e46 input=6503d11e5958b360]*/
 {
     return _Py_GetLocaleEncodingObject();
 }
@@ -811,7 +811,7 @@ static struct PyMethodDef PyLocale_Methods[] = {
     _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
 #endif
 #endif
-    _LOCALE__GET_LOCALE_ENCODING_METHODDEF
+    _LOCALE_GETENCODING_METHODDEF
   {NULL, NULL}
 };
 

diff --git a/Modules/clinic/_localemodule.c.h b/Modules/clinic/_localemodule.c.h
diff --git a/Python/fileutils.c b/Python/fileutils.c
@@ -93,6 +93,10 @@ _Py_device_encoding(int fd)
 
     return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
 #else
+    if (_PyRuntime.preconfig.utf8_mode) {
+        _Py_DECLARE_STR(utf_8, "utf-8");
+        return Py_NewRef(&_Py_STR(utf_8));
+    }
     return _Py_GetLocaleEncodingObject();
 #endif
 }
@@ -873,10 +877,10 @@ _Py_EncodeLocaleEx(const wchar_t *text, char **str,
 
 // Get the current locale encoding name:
 //
-// - Return "UTF-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
-// - Return "UTF-8" if the UTF-8 Mode is enabled
+// - Return "utf-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
+// - Return "utf-8" if the UTF-8 Mode is enabled
 // - On Windows, return the ANSI code page (ex: "cp1250")
-// - Return "UTF-8" if nl_langinfo(CODESET) returns an empty string.
+// - Return "utf-8" if nl_langinfo(CODESET) returns an empty string.
 // - Otherwise, return nl_langinfo(CODESET).
 //
 // Return NULL on memory allocation failure.
@@ -888,12 +892,8 @@ _Py_GetLocaleEncoding(void)
 #ifdef _Py_FORCE_UTF8_LOCALE
     // On Android langinfo.h and CODESET are missing,
     // and UTF-8 is always used in mbstowcs() and wcstombs().
-    return _PyMem_RawWcsdup(L"UTF-8");
+    return _PyMem_RawWcsdup(L"utf-8");
 #else
-    const PyPreConfig *preconfig = &_PyRuntime.preconfig;
-    if (preconfig->utf8_mode) {
-        return _PyMem_RawWcsdup(L"UTF-8");
-    }
 
 #ifdef MS_WINDOWS
     wchar_t encoding[23];
@@ -906,7 +906,7 @@ _Py_GetLocaleEncoding(void)
     if (!encoding || encoding[0] == '\0') {
         // Use UTF-8 if nl_langinfo() returns an empty string. It can happen on
         // macOS if the LC_CTYPE locale is not supported.
-        return _PyMem_RawWcsdup(L"UTF-8");
+        return _PyMem_RawWcsdup(L"utf-8");
     }
 
     wchar_t *wstr;

@@ -1779,7 +1779,13 @@ static PyStatus
 config_get_locale_encoding(PyConfig *config, const PyPreConfig *preconfig,
                            wchar_t **locale_encoding)
 {
-    wchar_t *encoding = _Py_GetLocaleEncoding();
+    wchar_t *encoding;
+    if (preconfig->utf8_mode) {
+        encoding = _PyMem_RawWcsdup(L"utf-8");
+    }
+    else {
+        encoding = _Py_GetLocaleEncoding();
+    }
     if (encoding == NULL) {
         return _PyStatus_NO_MEMORY();
     }
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Add :func:`locale.getencoding`.
Copy link Member vstinner Apr 7, 2022 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. You can copy/paste the Doc/whatsnew/3.11.rst entry entry.