From 3d5bca4d1fdaefcaaaeed7415c8f468fb4a2d8e7 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Fri, 7 Jun 2024 22:25:24 +0200
Subject: [PATCH 1/7] gh-119182: Decode PyUnicode_FromFormat() format from
 UTF-8

PyUnicode_FromFormat() now decodes the format string from UTF-8 with
the "replace" error handler, instead of decoding it from ASCII.

Remove unused 'consumed' parameter of unicode_decode_utf8_writer().
---
 Doc/c-api/unicode.rst                         |  9 +++-
 Doc/whatsnew/3.14.rst                         |  4 ++
 Lib/test/test_capi/test_unicode.py            | 12 +++---
 ...-06-07-22-38-08.gh-issue-119182.P3nXBm.rst |  3 ++
 Objects/unicodeobject.c                       | 43 ++++++-------------
 5 files changed, 34 insertions(+), 37 deletions(-)
 create mode 100644 Misc/NEWS.d/next/C API/2024-06-07-22-38-08.gh-issue-119182.P3nXBm.rst

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 7320d035bab513..1d7c8745cb63d1 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -387,7 +387,8 @@ APIs:
    arguments, calculate the size of the resulting Python Unicode string and return
    a string with the values formatted into it.  The variable arguments must be C
    types and must correspond exactly to the format characters in the *format*
-   ASCII-encoded string.
+   string. The *format* string is decoded from UTF-8 with the "replace" error
+   handler.
 
    A conversion specifier contains two or more characters and has the following
    components, which must occur in this order:
@@ -487,7 +488,8 @@ APIs:
 
       * - ``s``
         - :c:expr:`const char*` or :c:expr:`const wchar_t*`
-        - A null-terminated C character array.
+        - A null-terminated C character array. :c:expr:`const char*` is decoded
+          from UTF-8 with the "replace" error handler.
 
       * - ``p``
         - :c:expr:`const void*`
@@ -576,6 +578,9 @@ APIs:
    .. versionchanged:: 3.13
       Support for ``%T``, ``%#T``, ``%N`` and ``%#N`` formats added.
 
+   .. versionchanged:: 3.14
+      The format string is now decoded from UTF-8 instead of ASCII.
+
 
 .. c:function:: PyObject* PyUnicode_FromFormatV(const char *format, va_list vargs)
 
diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index b2dd80b64a691a..21880f872f09cf 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -261,6 +261,10 @@ New Features
 Porting to Python 3.14
 ----------------------
 
+* :c:func:`PyUnicode_FromFormat` now decodes the format string from UTF-8 with
+  the "replace" error handler, instead of decoding it from ASCII.
+  (Contributed by Victor Stinner in :gh:`119182`.)
+
 Deprecated
 ----------
 
diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index a69f817c515ba7..2b7352aabeffa1 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -384,12 +384,12 @@ def check_format(expected, format, *args):
         check_format('ascii\x7f=unicode\xe9',
                      b'ascii\x7f=%U', 'unicode\xe9')
 
-        # non-ascii format, ascii argument: ensure that PyUnicode_FromFormatV()
-        # raises an error
-        self.assertRaisesRegex(ValueError,
-            r'^PyUnicode_FromFormatV\(\) expects an ASCII-encoded format '
-            'string, got a non-ASCII byte: 0xe9$',
-            PyUnicode_FromFormat, b'unicode\xe9=%s', 'ascii')
+        # Non-ASCII format and non-ASCII arguments are both decoded
+        # from UTF-8/replace
+        check_format('unicode\xe9=\u20ac',
+                     'unicode\xe9=%s'.encode(), '\u20ac'.encode())
+        check_format('invalid\ufffd=abc\ufffd',
+                     b'invalid\xe9=%s', b'abc\xe9')
 
         # test "%c"
         check_format('\uabcd',
diff --git a/Misc/NEWS.d/next/C API/2024-06-07-22-38-08.gh-issue-119182.P3nXBm.rst b/Misc/NEWS.d/next/C API/2024-06-07-22-38-08.gh-issue-119182.P3nXBm.rst
new file mode 100644
index 00000000000000..71e5ae8579a800
--- /dev/null
+++ b/Misc/NEWS.d/next/C API/2024-06-07-22-38-08.gh-issue-119182.P3nXBm.rst	
@@ -0,0 +1,3 @@
+:c:func:`PyUnicode_FromFormat` now decodes the format string from UTF-8 with
+the "replace" error handler, instead of decoding it from ASCII. Patch by
+Victor Stinner.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 3b0b4173408724..cff227911298d0 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -205,8 +205,7 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
 static int
 unicode_decode_utf8_writer(_PyUnicodeWriter *writer,
                            const char *s, Py_ssize_t size,
-                           _Py_error_handler error_handler, const char *errors,
-                           Py_ssize_t *consumed);
+                           _Py_error_handler error_handler, const char *errors);
 #ifdef Py_DEBUG
 static inline int unicode_is_finalizing(void);
 static int unicode_is_singleton(PyObject *unicode);
@@ -2402,7 +2401,7 @@ unicode_fromformat_write_utf8(_PyUnicodeWriter *writer, const char *str,
 
     if (width < 0) {
         return unicode_decode_utf8_writer(writer, str, length,
-                                          _Py_ERROR_REPLACE, "replace", NULL);
+                                          _Py_ERROR_REPLACE, "replace");
     }
 
     PyObject *unicode = PyUnicode_DecodeUTF8Stateful(str, length,
@@ -2896,28 +2895,21 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
             const char *p;
             Py_ssize_t len;
 
-            p = f;
-            do
-            {
-                if ((unsigned char)*p > 127) {
-                    PyErr_Format(PyExc_ValueError,
-                        "PyUnicode_FromFormatV() expects an ASCII-encoded format "
-                        "string, got a non-ASCII byte: 0x%02x",
-                        (unsigned char)*p);
-                    goto fail;
-                }
-                p++;
+            p = strchr(f, '%');
+            if (p != NULL) {
+                len = p - f;
             }
-            while (*p != '\0' && *p != '%');
-            len = p - f;
-
-            if (*p == '\0')
+            else {
+                len = strlen(f);
                 writer.overallocate = 0;
+            }
 
-            if (_PyUnicodeWriter_WriteASCIIString(&writer, f, len) < 0)
+            if (unicode_decode_utf8_writer(&writer, f, len,
+                                           _Py_ERROR_REPLACE, "replace") < 0) {
                 goto fail;
+            }
 
-            f = p;
+            f += len;
         }
     }
     va_end(vargs2);
@@ -4930,13 +4922,9 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
 static int
 unicode_decode_utf8_writer(_PyUnicodeWriter *writer,
                            const char *s, Py_ssize_t size,
-                           _Py_error_handler error_handler, const char *errors,
-                           Py_ssize_t *consumed)
+                           _Py_error_handler error_handler, const char *errors)
 {
     if (size == 0) {
-        if (consumed) {
-            *consumed = 0;
-        }
         return 0;
     }
 
@@ -4954,9 +4942,6 @@ unicode_decode_utf8_writer(_PyUnicodeWriter *writer,
         writer->pos += decoded;
 
         if (decoded == size) {
-            if (consumed) {
-                *consumed = size;
-            }
             return 0;
         }
         s += decoded;
@@ -4964,7 +4949,7 @@ unicode_decode_utf8_writer(_PyUnicodeWriter *writer,
     }
 
     return unicode_decode_utf8_impl(writer, starts, s, end,
-                                    error_handler, errors, consumed);
+                                    error_handler, errors, NULL);
 }
 
 

From 6a879156a924df2dbe078cc95c6a3b549daf6f2b Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Fri, 7 Jun 2024 22:52:41 +0200
Subject: [PATCH 2/7] Update test_exceptions

---
 Lib/test/test_capi/test_exceptions.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/Lib/test/test_capi/test_exceptions.py b/Lib/test/test_capi/test_exceptions.py
index c475b6d78d0c56..404cb39a8488d1 100644
--- a/Lib/test/test_capi/test_exceptions.py
+++ b/Lib/test/test_capi/test_exceptions.py
@@ -264,17 +264,22 @@ def test_format(self):
         PyErr_Format = getattr(pythonapi, name)
         PyErr_Format.argtypes = (py_object, c_char_p,)
         PyErr_Format.restype = py_object
+
         with self.assertRaises(ZeroDivisionError) as e:
             PyErr_Format(ZeroDivisionError, b'%s %d', b'error', c_int(42))
         self.assertEqual(e.exception.args, ('error 42',))
+
+        with self.assertRaises(ZeroDivisionError) as e:
+            PyErr_Format(ZeroDivisionError, b'invalid \xff')
+        self.assertEqual(e.exception.args, ('invalid \ufffd',))
+
         with self.assertRaises(ZeroDivisionError) as e:
             PyErr_Format(ZeroDivisionError, b'%s', 'помилка'.encode())
         self.assertEqual(e.exception.args, ('помилка',))
 
         with self.assertRaisesRegex(OverflowError, 'not in range'):
             PyErr_Format(ZeroDivisionError, b'%c', c_int(-1))
-        with self.assertRaisesRegex(ValueError, 'format string'):
-            PyErr_Format(ZeroDivisionError, b'\xff')
+
         self.assertRaises(SystemError, PyErr_Format, list, b'error')
         # CRASHES PyErr_Format(ZeroDivisionError, NULL)
         # CRASHES PyErr_Format(py_object(), b'error')
@@ -377,7 +382,7 @@ def test_err_formatunraisable(self):
             self.assertEqual(str(cm.unraisable.exc_value), 'oops!')
             self.assertEqual(cm.unraisable.exc_traceback.tb_lineno,
                              firstline + 15)
-            self.assertIsNone(cm.unraisable.err_msg)
+            self.assertEqual(cm.unraisable.err_msg, 'undecodable \ufffd')
             self.assertIsNone(cm.unraisable.object)
 
         with support.catch_unraisable_exception() as cm:
@@ -401,7 +406,8 @@ def test_err_formatunraisable(self):
               support.captured_stderr() as stderr):
             formatunraisable(CustomError('oops!'), b'undecodable \xff')
         lines = stderr.getvalue().splitlines()
-        self.assertEqual(lines[0], 'Traceback (most recent call last):')
+        self.assertEqual(lines[0], 'undecodable \ufffd:')
+        self.assertEqual(lines[1], 'Traceback (most recent call last):')
         self.assertEqual(lines[-1], f'{__name__}.CustomError: oops!')
 
         with (support.swap_attr(sys, 'unraisablehook', None),

From e830944769b3ee25bc251bc1b88a71f1c95d1e7c Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Tue, 11 Jun 2024 12:39:13 +0200
Subject: [PATCH 3/7] Use strict error handler

---
 Doc/c-api/unicode.rst                              |  2 +-
 Doc/whatsnew/3.14.rst                              |  2 +-
 Lib/test/test_capi/test_exceptions.py              | 14 +++++---------
 Lib/test/test_capi/test_unicode.py                 | 13 +++++++------
 .../2024-06-07-22-38-08.gh-issue-119182.P3nXBm.rst |  2 +-
 Objects/unicodeobject.c                            |  7 ++++++-
 6 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 1d7c8745cb63d1..187c8a4b595c93 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -387,7 +387,7 @@ APIs:
    arguments, calculate the size of the resulting Python Unicode string and return
    a string with the values formatted into it.  The variable arguments must be C
    types and must correspond exactly to the format characters in the *format*
-   string. The *format* string is decoded from UTF-8 with the "replace" error
+   string. The *format* string is decoded from UTF-8 with the "strict" error
    handler.
 
    A conversion specifier contains two or more characters and has the following
diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index 21880f872f09cf..2bfef2564af87f 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -262,7 +262,7 @@ Porting to Python 3.14
 ----------------------
 
 * :c:func:`PyUnicode_FromFormat` now decodes the format string from UTF-8 with
-  the "replace" error handler, instead of decoding it from ASCII.
+  the "strict" error handler, instead of decoding it from ASCII.
   (Contributed by Victor Stinner in :gh:`119182`.)
 
 Deprecated
diff --git a/Lib/test/test_capi/test_exceptions.py b/Lib/test/test_capi/test_exceptions.py
index 404cb39a8488d1..df9ff83c2bf6b3 100644
--- a/Lib/test/test_capi/test_exceptions.py
+++ b/Lib/test/test_capi/test_exceptions.py
@@ -264,21 +264,18 @@ def test_format(self):
         PyErr_Format = getattr(pythonapi, name)
         PyErr_Format.argtypes = (py_object, c_char_p,)
         PyErr_Format.restype = py_object
-
         with self.assertRaises(ZeroDivisionError) as e:
             PyErr_Format(ZeroDivisionError, b'%s %d', b'error', c_int(42))
         self.assertEqual(e.exception.args, ('error 42',))
-
-        with self.assertRaises(ZeroDivisionError) as e:
-            PyErr_Format(ZeroDivisionError, b'invalid \xff')
-        self.assertEqual(e.exception.args, ('invalid \ufffd',))
-
         with self.assertRaises(ZeroDivisionError) as e:
             PyErr_Format(ZeroDivisionError, b'%s', 'помилка'.encode())
         self.assertEqual(e.exception.args, ('помилка',))
 
         with self.assertRaisesRegex(OverflowError, 'not in range'):
             PyErr_Format(ZeroDivisionError, b'%c', c_int(-1))
+        with self.assertRaisesRegex(ValueError, 'format string') as cm:
+            PyErr_Format(ZeroDivisionError, b'\xff')
+        self.assertIsInstance(cm.exception.__context__, UnicodeDecodeError)
 
         self.assertRaises(SystemError, PyErr_Format, list, b'error')
         # CRASHES PyErr_Format(ZeroDivisionError, NULL)
@@ -382,7 +379,7 @@ def test_err_formatunraisable(self):
             self.assertEqual(str(cm.unraisable.exc_value), 'oops!')
             self.assertEqual(cm.unraisable.exc_traceback.tb_lineno,
                              firstline + 15)
-            self.assertEqual(cm.unraisable.err_msg, 'undecodable \ufffd')
+            self.assertIsNone(cm.unraisable.err_msg)
             self.assertIsNone(cm.unraisable.object)
 
         with support.catch_unraisable_exception() as cm:
@@ -406,8 +403,7 @@ def test_err_formatunraisable(self):
               support.captured_stderr() as stderr):
             formatunraisable(CustomError('oops!'), b'undecodable \xff')
         lines = stderr.getvalue().splitlines()
-        self.assertEqual(lines[0], 'undecodable \ufffd:')
-        self.assertEqual(lines[1], 'Traceback (most recent call last):')
+        self.assertEqual(lines[0], 'Traceback (most recent call last):')
         self.assertEqual(lines[-1], f'{__name__}.CustomError: oops!')
 
         with (support.swap_attr(sys, 'unraisablehook', None),
diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index 2b7352aabeffa1..874fa87cf109ea 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -384,12 +384,13 @@ def check_format(expected, format, *args):
         check_format('ascii\x7f=unicode\xe9',
                      b'ascii\x7f=%U', 'unicode\xe9')
 
-        # Non-ASCII format and non-ASCII arguments are both decoded
-        # from UTF-8/replace
-        check_format('unicode\xe9=\u20ac',
-                     'unicode\xe9=%s'.encode(), '\u20ac'.encode())
-        check_format('invalid\ufffd=abc\ufffd',
-                     b'invalid\xe9=%s', b'abc\xe9')
+        # The %s arguments are decoded from UTF-8/replace.
+        # The format string is decoded from UTF-8/strict.
+        check_format('value=utf8 \u20ac',
+                     'value=%s'.encode(), 'utf8 \u20ac'.encode())
+        with self.assertRaisesRegex(ValueError, 'format string') as cm:
+            PyUnicode_FromFormat(b'invalid format string\xff: %s', b'abc')
+        self.assertIsInstance(cm.exception.__context__, UnicodeDecodeError)
 
         # test "%c"
         check_format('\uabcd',
diff --git a/Misc/NEWS.d/next/C API/2024-06-07-22-38-08.gh-issue-119182.P3nXBm.rst b/Misc/NEWS.d/next/C API/2024-06-07-22-38-08.gh-issue-119182.P3nXBm.rst
index 71e5ae8579a800..c70c34ac751d49 100644
--- a/Misc/NEWS.d/next/C API/2024-06-07-22-38-08.gh-issue-119182.P3nXBm.rst	
+++ b/Misc/NEWS.d/next/C API/2024-06-07-22-38-08.gh-issue-119182.P3nXBm.rst	
@@ -1,3 +1,3 @@
 :c:func:`PyUnicode_FromFormat` now decodes the format string from UTF-8 with
-the "replace" error handler, instead of decoding it from ASCII. Patch by
+the "strict" error handler, instead of decoding it from ASCII. Patch by
 Victor Stinner.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index cff227911298d0..f92e6073fdfc4a 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2905,7 +2905,12 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
             }
 
             if (unicode_decode_utf8_writer(&writer, f, len,
-                                           _Py_ERROR_REPLACE, "replace") < 0) {
+                                           _Py_ERROR_STRICT, "strict") < 0) {
+                PyObject *exc = PyErr_GetRaisedException();
+                PyErr_Format(PyExc_ValueError,
+                    "PyUnicode_FromFormatV() expects a valid UTF-8-encoded "
+                    "format string, got an invalid UTF-8 string");
+                _PyErr_ChainExceptions1(exc);
                 goto fail;
             }
 

From 242e6cb583e384ca67cf417ade13ead6c6927032 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Tue, 11 Jun 2024 13:10:30 +0200
Subject: [PATCH 4/7] Fix error handling

Replace PyErr_Format() with PyErr_SetString()
---
 Objects/unicodeobject.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index f92e6073fdfc4a..7f7cdcf34d3bbd 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -2907,7 +2907,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
             if (unicode_decode_utf8_writer(&writer, f, len,
                                            _Py_ERROR_STRICT, "strict") < 0) {
                 PyObject *exc = PyErr_GetRaisedException();
-                PyErr_Format(PyExc_ValueError,
+                PyErr_SetString(PyExc_ValueError,
                     "PyUnicode_FromFormatV() expects a valid UTF-8-encoded "
                     "format string, got an invalid UTF-8 string");
                 _PyErr_ChainExceptions1(exc);

From d04269ff6ea5d129be59ef1668c93ec3fca88eae Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Tue, 11 Jun 2024 14:07:49 +0200
Subject: [PATCH 5/7] Add tests on truncated UTF-8 format strings

---
 Lib/test/test_capi/test_unicode.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py
index 874fa87cf109ea..3dadf7657056b8 100644
--- a/Lib/test/test_capi/test_unicode.py
+++ b/Lib/test/test_capi/test_unicode.py
@@ -380,7 +380,7 @@ def check_format(expected, format, *args):
             text = PyUnicode_FromFormat(format, *args)
             self.assertEqual(expected, text)
 
-        # ascii format, non-ascii argument
+        # ASCII format, non-ASCII %U argument
         check_format('ascii\x7f=unicode\xe9',
                      b'ascii\x7f=%U', 'unicode\xe9')
 
@@ -392,6 +392,12 @@ def check_format(expected, format, *args):
             PyUnicode_FromFormat(b'invalid format string\xff: %s', b'abc')
         self.assertIsInstance(cm.exception.__context__, UnicodeDecodeError)
 
+        # Truncated UTF-8 format strings
+        with self.assertRaisesRegex(ValueError, 'format string'):
+            PyUnicode_FromFormat(b'truncated utf8: \xc3')
+        with self.assertRaisesRegex(ValueError, 'format string'):
+            PyUnicode_FromFormat(b'truncated utf8: \xe2\x82')
+
         # test "%c"
         check_format('\uabcd',
                      b'%c', c_int(0xabcd))

From 94da5e7680e817a7f54b6b5137fe748b66be3f5f Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Tue, 11 Jun 2024 14:10:05 +0200
Subject: [PATCH 6/7] Don't mention the strict error handler

---
 Doc/c-api/unicode.rst                                        | 3 +--
 Doc/whatsnew/3.14.rst                                        | 4 ++--
 .../C API/2024-06-07-22-38-08.gh-issue-119182.P3nXBm.rst     | 5 ++---
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
index 187c8a4b595c93..0470e6b91e3cc0 100644
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -387,8 +387,7 @@ APIs:
    arguments, calculate the size of the resulting Python Unicode string and return
    a string with the values formatted into it.  The variable arguments must be C
    types and must correspond exactly to the format characters in the *format*
-   string. The *format* string is decoded from UTF-8 with the "strict" error
-   handler.
+   string. The *format* string is decoded from UTF-8.
 
    A conversion specifier contains two or more characters and has the following
    components, which must occur in this order:
diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index 2bfef2564af87f..8826a1120d625f 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -261,8 +261,8 @@ New Features
 Porting to Python 3.14
 ----------------------
 
-* :c:func:`PyUnicode_FromFormat` now decodes the format string from UTF-8 with
-  the "strict" error handler, instead of decoding it from ASCII.
+* :c:func:`PyUnicode_FromFormat` now decodes the format string from UTF-8,
+  instead of ASCII.
   (Contributed by Victor Stinner in :gh:`119182`.)
 
 Deprecated
diff --git a/Misc/NEWS.d/next/C API/2024-06-07-22-38-08.gh-issue-119182.P3nXBm.rst b/Misc/NEWS.d/next/C API/2024-06-07-22-38-08.gh-issue-119182.P3nXBm.rst
index c70c34ac751d49..995e4633e35eef 100644
--- a/Misc/NEWS.d/next/C API/2024-06-07-22-38-08.gh-issue-119182.P3nXBm.rst	
+++ b/Misc/NEWS.d/next/C API/2024-06-07-22-38-08.gh-issue-119182.P3nXBm.rst	
@@ -1,3 +1,2 @@
-:c:func:`PyUnicode_FromFormat` now decodes the format string from UTF-8 with
-the "strict" error handler, instead of decoding it from ASCII. Patch by
-Victor Stinner.
+:c:func:`PyUnicode_FromFormat` now decodes the format string from UTF-8,
+instead of ASCII. Patch by Victor Stinner.

From 89fd69ab3eaa1a24648572c65bc129b28e6187a2 Mon Sep 17 00:00:00 2001
From: Victor Stinner <vstinner@python.org>
Date: Tue, 11 Jun 2024 20:33:22 +0200
Subject: [PATCH 7/7] Revert consumed parameter

---
 Objects/unicodeobject.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 7f7cdcf34d3bbd..a6817d53e8c9a0 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -205,7 +205,8 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
 static int
 unicode_decode_utf8_writer(_PyUnicodeWriter *writer,
                            const char *s, Py_ssize_t size,
-                           _Py_error_handler error_handler, const char *errors);
+                           _Py_error_handler error_handler, const char *errors,
+                           Py_ssize_t *consumed);
 #ifdef Py_DEBUG
 static inline int unicode_is_finalizing(void);
 static int unicode_is_singleton(PyObject *unicode);
@@ -2401,7 +2402,7 @@ unicode_fromformat_write_utf8(_PyUnicodeWriter *writer, const char *str,
 
     if (width < 0) {
         return unicode_decode_utf8_writer(writer, str, length,
-                                          _Py_ERROR_REPLACE, "replace");
+                                          _Py_ERROR_REPLACE, "replace", NULL);
     }
 
     PyObject *unicode = PyUnicode_DecodeUTF8Stateful(str, length,
@@ -2905,7 +2906,8 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
             }
 
             if (unicode_decode_utf8_writer(&writer, f, len,
-                                           _Py_ERROR_STRICT, "strict") < 0) {
+                                           _Py_ERROR_STRICT, "strict",
+                                           NULL) < 0) {
                 PyObject *exc = PyErr_GetRaisedException();
                 PyErr_SetString(PyExc_ValueError,
                     "PyUnicode_FromFormatV() expects a valid UTF-8-encoded "
@@ -4927,9 +4929,13 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
 static int
 unicode_decode_utf8_writer(_PyUnicodeWriter *writer,
                            const char *s, Py_ssize_t size,
-                           _Py_error_handler error_handler, const char *errors)
+                           _Py_error_handler error_handler, const char *errors,
+                           Py_ssize_t *consumed)
 {
     if (size == 0) {
+        if (consumed) {
+            *consumed = 0;
+        }
         return 0;
     }
 
@@ -4947,6 +4953,9 @@ unicode_decode_utf8_writer(_PyUnicodeWriter *writer,
         writer->pos += decoded;
 
         if (decoded == size) {
+            if (consumed) {
+                *consumed = size;
+            }
             return 0;
         }
         s += decoded;
@@ -4954,7 +4963,7 @@ unicode_decode_utf8_writer(_PyUnicodeWriter *writer,
     }
 
     return unicode_decode_utf8_impl(writer, starts, s, end,
-                                    error_handler, errors, NULL);
+                                    error_handler, errors, consumed);
 }