Skip to content

Commit 6773203

Browse files
authored
bpo-47000: Add locale.getencoding() (GH-32068)
1 parent cd29bd1 commit 6773203

File tree

11 files changed

+88
-46
lines changed

11 files changed

+88
-46
lines changed

Doc/glossary.rst

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -706,15 +706,15 @@ Glossary
706706

707707
locale encoding
708708
On Unix, it is the encoding of the LC_CTYPE locale. It can be set with
709-
``locale.setlocale(locale.LC_CTYPE, new_locale)``.
709+
:func:`locale.setlocale(locale.LC_CTYPE, new_locale) <locale.setlocale>`.
710710

711-
On Windows, it is the ANSI code page (ex: ``cp1252``).
711+
On Windows, it is the ANSI code page (ex: ``"cp1252"``).
712712

713-
``locale.getpreferredencoding(False)`` can be used to get the locale
714-
encoding.
713+
On Android and VxWorks, Python uses ``"utf-8"`` as the locale encoding.
715714

716-
Python uses the :term:`filesystem encoding and error handler` to convert
717-
between Unicode filenames and bytes filenames.
715+
``locale.getencoding()`` can be used to get the locale encoding.
716+
717+
See also the :term:`filesystem encoding and error handler`.
718718

719719
list
720720
A built-in Python :term:`sequence`. Despite its name it is more akin

Doc/library/locale.rst

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -327,17 +327,37 @@ The :mod:`locale` module defines the following exception and functions:
327327
is not necessary or desired, *do_setlocale* should be set to ``False``.
328328

329329
On Android or if the :ref:`Python UTF-8 Mode <utf8-mode>` is enabled, always
330-
return ``'UTF-8'``, the :term:`locale encoding` and the *do_setlocale*
330+
return ``'utf-8'``, the :term:`locale encoding` and the *do_setlocale*
331331
argument are ignored.
332332

333333
The :ref:`Python preinitialization <c-preinit>` configures the LC_CTYPE
334334
locale. See also the :term:`filesystem encoding and error handler`.
335335

336336
.. versionchanged:: 3.7
337-
The function now always returns ``UTF-8`` on Android or if the
337+
The function now always returns ``"utf-8"`` on Android or if the
338338
:ref:`Python UTF-8 Mode <utf8-mode>` is enabled.
339339

340340

341+
.. function:: getencoding()
342+
343+
Get the current :term:`locale encoding`:
344+
345+
* On Android and VxWorks, return ``"utf-8"``.
346+
* On Unix, return the encoding of the current :data:`LC_CTYPE` locale.
347+
Return ``"utf-8"`` if ``nl_langinfo(CODESET)`` returns an empty string:
348+
for example, if the current LC_CTYPE locale is not supported.
349+
* On Windows, return the ANSI code page.
350+
351+
The :ref:`Python preinitialization <c-preinit>` configures the LC_CTYPE
352+
locale. See also the :term:`filesystem encoding and error handler`.
353+
354+
This function is similar to
355+
:func:`getpreferredencoding(False) <getpreferredencoding>` except this
356+
function ignores the :ref:`Python UTF-8 Mode <utf8-mode>`.
357+
358+
.. versionadded:: 3.11
359+
360+
341361
.. function:: normalize(localename)
342362

343363
Returns a normalized locale code for the given locale name. The returned locale

Doc/whatsnew/3.11.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,13 @@ inspect
285285
* Add :func:`inspect.ismethodwrapper` for checking if the type of an object is a
286286
:class:`~types.MethodWrapperType`. (Contributed by Hakan Çelik in :issue:`29418`.)
287287

288+
locale
289+
------
290+
291+
* Add :func:`locale.getencoding` to get the current locale encoding. It is similar to
292+
``locale.getpreferredencoding(False)`` but ignores the
293+
:ref:`Python UTF-8 Mode <utf8-mode>`.
294+
288295
math
289296
----
290297

Lib/locale.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
"setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm",
2929
"str", "atof", "atoi", "format", "format_string", "currency",
3030
"normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY",
31-
"LC_NUMERIC", "LC_ALL", "CHAR_MAX"]
31+
"LC_NUMERIC", "LC_ALL", "CHAR_MAX", "getencoding"]
3232

3333
def _strcoll(a,b):
3434
""" strcoll(string,string) -> int.
@@ -637,45 +637,45 @@ def resetlocale(category=LC_ALL):
637637

638638

639639
try:
640-
from _locale import _get_locale_encoding
640+
from _locale import getencoding
641641
except ImportError:
642-
def _get_locale_encoding():
642+
def getencoding():
643643
if hasattr(sys, 'getandroidapilevel'):
644644
# On Android langinfo.h and CODESET are missing, and UTF-8 is
645645
# always used in mbstowcs() and wcstombs().
646-
return 'UTF-8'
647-
if sys.flags.utf8_mode:
648-
return 'UTF-8'
646+
return 'utf-8'
649647
encoding = getdefaultlocale()[1]
650648
if encoding is None:
651-
# LANG not set, default conservatively to ASCII
652-
encoding = 'ascii'
649+
# LANG not set, default to UTF-8
650+
encoding = 'utf-8'
653651
return encoding
654652

655653
try:
656654
CODESET
657655
except NameError:
658656
def getpreferredencoding(do_setlocale=True):
659657
"""Return the charset that the user is likely using."""
660-
return _get_locale_encoding()
658+
if sys.flags.utf8_mode:
659+
return 'utf-8'
660+
return getencoding()
661661
else:
662662
# On Unix, if CODESET is available, use that.
663663
def getpreferredencoding(do_setlocale=True):
664664
"""Return the charset that the user is likely using,
665665
according to the system configuration."""
666666
if sys.flags.utf8_mode:
667-
return 'UTF-8'
667+
return 'utf-8'
668668

669669
if not do_setlocale:
670-
return _get_locale_encoding()
670+
return getencoding()
671671

672672
old_loc = setlocale(LC_CTYPE)
673673
try:
674674
try:
675675
setlocale(LC_CTYPE, "")
676676
except Error:
677677
pass
678-
return _get_locale_encoding()
678+
return getencoding()
679679
finally:
680680
setlocale(LC_CTYPE, old_loc)
681681

Lib/test/test_utf8_mode.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,12 +203,12 @@ def test_pyio_encoding(self):
203203
def test_locale_getpreferredencoding(self):
204204
code = 'import locale; print(locale.getpreferredencoding(False), locale.getpreferredencoding(True))'
205205
out = self.get_output('-X', 'utf8', '-c', code)
206-
self.assertEqual(out, 'UTF-8 UTF-8')
206+
self.assertEqual(out, 'utf-8 utf-8')
207207

208208
for loc in POSIX_LOCALES:
209209
with self.subTest(LC_ALL=loc):
210210
out = self.get_output('-X', 'utf8', '-c', code, LC_ALL=loc)
211-
self.assertEqual(out, 'UTF-8 UTF-8')
211+
self.assertEqual(out, 'utf-8 utf-8')
212212

213213
@unittest.skipIf(MS_WINDOWS, 'test specific to Unix')
214214
def test_cmd_line(self):
@@ -276,7 +276,7 @@ def test_device_encoding(self):
276276
# In UTF-8 Mode, device_encoding(fd) returns "UTF-8" if fd is a TTY
277277
with open(filename, encoding="utf8") as fp:
278278
out = fp.read().rstrip()
279-
self.assertEqual(out, 'True UTF-8')
279+
self.assertEqual(out, 'True utf-8')
280280

281281

282282
if __name__ == "__main__":
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Add :func:`locale.getencoding` to get the current locale encoding.
2+
It is similar to ``locale.getpreferredencoding(False)`` but ignores the
3+
:ref:`Python UTF-8 Mode <utf8-mode>`.

Modules/_io/textio.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1145,7 +1145,13 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
11451145
}
11461146
}
11471147
if (encoding == NULL && self->encoding == NULL) {
1148-
self->encoding = _Py_GetLocaleEncodingObject();
1148+
if (_PyRuntime.preconfig.utf8_mode) {
1149+
_Py_DECLARE_STR(utf_8, "utf-8");
1150+
self->encoding = Py_NewRef(&_Py_STR(utf_8));
1151+
}
1152+
else {
1153+
self->encoding = _Py_GetLocaleEncodingObject();
1154+
}
11491155
if (self->encoding == NULL) {
11501156
goto error;
11511157
}

Modules/_localemodule.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -773,14 +773,14 @@ _locale_bind_textdomain_codeset_impl(PyObject *module, const char *domain,
773773

774774

775775
/*[clinic input]
776-
_locale._get_locale_encoding
776+
_locale.getencoding
777777
778778
Get the current locale encoding.
779779
[clinic start generated code]*/
780780

781781
static PyObject *
782-
_locale__get_locale_encoding_impl(PyObject *module)
783-
/*[clinic end generated code: output=e8e2f6f6f184591a input=513d9961d2f45c76]*/
782+
_locale_getencoding_impl(PyObject *module)
783+
/*[clinic end generated code: output=86b326b971872e46 input=6503d11e5958b360]*/
784784
{
785785
return _Py_GetLocaleEncodingObject();
786786
}
@@ -811,7 +811,7 @@ static struct PyMethodDef PyLocale_Methods[] = {
811811
_LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
812812
#endif
813813
#endif
814-
_LOCALE__GET_LOCALE_ENCODING_METHODDEF
814+
_LOCALE_GETENCODING_METHODDEF
815815
{NULL, NULL}
816816
};
817817

Modules/clinic/_localemodule.c.h

Lines changed: 8 additions & 8 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Python/fileutils.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,10 @@ _Py_device_encoding(int fd)
9393

9494
return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
9595
#else
96+
if (_PyRuntime.preconfig.utf8_mode) {
97+
_Py_DECLARE_STR(utf_8, "utf-8");
98+
return Py_NewRef(&_Py_STR(utf_8));
99+
}
96100
return _Py_GetLocaleEncodingObject();
97101
#endif
98102
}
@@ -873,10 +877,10 @@ _Py_EncodeLocaleEx(const wchar_t *text, char **str,
873877

874878
// Get the current locale encoding name:
875879
//
876-
// - Return "UTF-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
877-
// - Return "UTF-8" if the UTF-8 Mode is enabled
880+
// - Return "utf-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
881+
// - Return "utf-8" if the UTF-8 Mode is enabled
878882
// - On Windows, return the ANSI code page (ex: "cp1250")
879-
// - Return "UTF-8" if nl_langinfo(CODESET) returns an empty string.
883+
// - Return "utf-8" if nl_langinfo(CODESET) returns an empty string.
880884
// - Otherwise, return nl_langinfo(CODESET).
881885
//
882886
// Return NULL on memory allocation failure.
@@ -888,12 +892,8 @@ _Py_GetLocaleEncoding(void)
888892
#ifdef _Py_FORCE_UTF8_LOCALE
889893
// On Android langinfo.h and CODESET are missing,
890894
// and UTF-8 is always used in mbstowcs() and wcstombs().
891-
return _PyMem_RawWcsdup(L"UTF-8");
895+
return _PyMem_RawWcsdup(L"utf-8");
892896
#else
893-
const PyPreConfig *preconfig = &_PyRuntime.preconfig;
894-
if (preconfig->utf8_mode) {
895-
return _PyMem_RawWcsdup(L"UTF-8");
896-
}
897897

898898
#ifdef MS_WINDOWS
899899
wchar_t encoding[23];
@@ -906,7 +906,7 @@ _Py_GetLocaleEncoding(void)
906906
if (!encoding || encoding[0] == '\0') {
907907
// Use UTF-8 if nl_langinfo() returns an empty string. It can happen on
908908
// macOS if the LC_CTYPE locale is not supported.
909-
return _PyMem_RawWcsdup(L"UTF-8");
909+
return _PyMem_RawWcsdup(L"utf-8");
910910
}
911911

912912
wchar_t *wstr;

Python/initconfig.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1779,7 +1779,13 @@ static PyStatus
17791779
config_get_locale_encoding(PyConfig *config, const PyPreConfig *preconfig,
17801780
wchar_t **locale_encoding)
17811781
{
1782-
wchar_t *encoding = _Py_GetLocaleEncoding();
1782+
wchar_t *encoding;
1783+
if (preconfig->utf8_mode) {
1784+
encoding = _PyMem_RawWcsdup(L"utf-8");
1785+
}
1786+
else {
1787+
encoding = _Py_GetLocaleEncoding();
1788+
}
17831789
if (encoding == NULL) {
17841790
return _PyStatus_NO_MEMORY();
17851791
}

0 commit comments

Comments
 (0)