From 16be08fdf55f3bda8f272a2225a8920028bfb122 Mon Sep 17 00:00:00 2001
From: joncrall <erotemic@gmail.com>
Date: Sat, 29 Apr 2023 18:38:54 -0400
Subject: [PATCH 01/42] Very rough proof-of-concept
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

📜🤖 Added by blurb_it.
---
 ...-04-29-23-15-38.gh-issue-103997.BS3uVt.rst |   1 +
 Modules/main.c                                | 135 ++++++++++++++++++
 2 files changed, 136 insertions(+)
 create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst

diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst b/Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst
new file mode 100644
index 00000000000000..8949f435731e34
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst	
@@ -0,0 +1 @@
+Strings passed to  "-c" are now automatically dedented (common leading whitespace is removed). This allows "python -c" invocations to be indented in shell scripts without causing indentation errors.
diff --git a/Modules/main.c b/Modules/main.c
index 7edfeb3365b4c6..ef90c7d259ad93 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -229,12 +229,145 @@ pymain_import_readline(const PyConfig *config)
 }
 
 
+/*_command_dedent(wchar_t *command)                                            */
+/*{                                                                            */
+/*    // NEW CODE:                                                             */
+/*    // Remove common leading whitespace from the string                      */
+/*    // Handle dedenting the command                                          */
+/*    //                                                                       */
+/*    int cmdlen = wcslen(command);                                            */
+/*    fprintf(stderr, "COMMAND: %ls\n", command);                              */
+/*    fprintf(stderr, "cmdlen: %d\n", cmdlen);                                 */
+
+/*    int num_newlines = 0;                                                    */
+/*    int num_spaces = 0;                                                      */
+/*    for (int i = 0; i < cmdlen; i++)                                         */
+/*    {                                                                        */
+/*        if (wcsncmp(command + i, L"\n", 1) == 0){                            */
+/*            num_newlines++;                                                  */
+/*        }                                                                    */
+/*    }                                                                        */
+/*    int* line_endloc = (int*) malloc(sizeof(int) * num_newlines + 1);        */
+/*    int* line_lens = (int*) malloc(sizeof(int) * num_newlines + 1);          */
+/*    int* line_nleadingspaces = (int*) malloc(sizeof(int) * num_newlines + 1);*/
+
+/*    int curr_line = 0;                                                       */
+/*    int curr_line = 0;                                                       */
+/*    for (int i = 0; i < cmdlen; i++)                                         */
+/*    {                                                                        */
+/*        if (wcsncmp(command + i, L"\n", 1) == 0){                            */
+/*            num_newlines++;                                                  */
+/*        }                                                                    */
+/*    }                                                                        */
+
+/*    for (int i = 0; i < cmdlen; i++)                                         */
+/*    {                                                                        */
+/*        if (wcsncmp(command + i, L"\n", 1) == 0){                            */
+/*            num_newlines++;                                                  */
+/*        }                                                                    */
+/*        if (wcsncmp(command + i, L" ", 1) == 0){                             */
+/*            num_spaces++;                                                    */
+/*        }                                                                    */
+/*        fprintf(stderr, "command[%d] = '%lc'\n", i, command[i]);             */
+/*    }                                                                        */
+/*    fprintf(stderr, "num_newlines: %d\n", num_newlines);                     */
+/*    fprintf(stderr, "num_spaces: %d\n", num_spaces);                         */
+
+/*}                                                                            */
+
+PyObject* _unicode_dedent(PyObject *unicode)
+{
+    PyObject *lines = PyUnicode_Splitlines(unicode, 1);
+    /*PyObject_Print(lines, stdout, 0);*/
+    /*fprintf(stdout, "\n");*/
+
+    Py_ssize_t num_lines = PyObject_Length(lines);
+
+    PyObject* space = PyUnicode_FromWideChar(L" ", -1);
+    PyObject* emptystr = PyUnicode_FromWideChar(L"", -1);
+    PyObject* new_unicode;
+
+    // Initialize leading space to a large value to indicate
+    // that it is uninitialized
+    Py_ssize_t effective_inf = PyObject_Length(unicode) + 1;
+    Py_ssize_t common_leading_spaces = effective_inf;
+
+    for (Py_ssize_t line_idx = 0; line_idx < num_lines; line_idx ++)
+    {
+        PyObject* index = PyLong_FromSsize_t(line_idx);
+        PyObject* line = PyObject_GetItem(lines, index);
+        Py_ssize_t line_len = PyObject_Length(line);
+
+        PyObject* striped_line = _PyUnicode_XStrip(line, 0, space);
+        Py_ssize_t stripline_len = PyObject_Length(striped_line);
+
+        Py_ssize_t leading_spaces = line_len - stripline_len;
+
+        // On non-empty lines, see if the amount of leading whitespace is less
+        // than current value. If so, update it.
+        if (line_len > 1)
+        {
+            if (leading_spaces < common_leading_spaces) {
+                common_leading_spaces = leading_spaces;
+            }
+        }
+
+        /*fprintf(stdout, "Index: %d\n", line_idx);                             */
+        /*fprintf(stdout, "Line Length: %d\n", line_len);                       */
+        /*fprintf(stdout, "Strip Line Length: %d\n", stripline_len);            */
+        /*fprintf(stdout, "leading_spaces: %d\n", leading_spaces);              */
+        /*fprintf(stdout, "common_leading_spaces: %d\n", common_leading_spaces);*/
+        /*fprintf(stdout, "Line: ");                                            */
+        /*PyObject_Print(PyObject_Repr(line), stdout, 1);*/
+        //fprintf(stdout, "\n");
+        Py_DECREF(line);
+        Py_DECREF(index);
+    }
+
+    if (common_leading_spaces > 0 && common_leading_spaces < effective_inf){
+
+        // We found common leading whitespace, strip if off.
+        PyObject* new_lines = PyList_New(num_lines);
+        for (Py_ssize_t line_idx = 0; line_idx < num_lines; line_idx ++)
+        {
+            PyObject* index = PyLong_FromSsize_t(line_idx);
+            PyObject* line = PyObject_GetItem(lines, index);
+            Py_ssize_t end = PyObject_Length(line);
+            Py_ssize_t start = common_leading_spaces;
+            if (end <= 1){
+                start = 0;
+            }
+            PyObject* new_line = PyUnicode_Substring(line, start, end);
+            PyList_SetItem(new_lines, line_idx, new_line);
+            Py_DECREF(line);
+            Py_DECREF(index);
+        }
+        /*PyObject_Print(PyObject_Repr(new_lines), stdout, 0);*/
+        //fprintf(stdout, "\n");
+
+        new_unicode = PyUnicode_Join(emptystr, new_lines);
+
+        Py_DECREF(unicode);
+        /*PyObject_Print(PyObject_Repr(new_unicode), stdout, 0);*/
+    }
+    else{
+       new_unicode = unicode;
+    }
+
+    //fprintf(stderr, "num_lines: %d\n", num_lines);
+    Py_DECREF(lines);
+    return new_unicode;
+}
+
+
 static int
 pymain_run_command(wchar_t *command)
 {
     PyObject *unicode, *bytes;
     int ret;
 
+    //_command_dedent(wchar_t *command)
+
     unicode = PyUnicode_FromWideChar(command, -1);
     if (unicode == NULL) {
         goto error;
@@ -244,6 +377,8 @@ pymain_run_command(wchar_t *command)
         return pymain_exit_err_print();
     }
 
+    unicode = _unicode_dedent(unicode);
+
     bytes = PyUnicode_AsUTF8String(unicode);
     Py_DECREF(unicode);
     if (bytes == NULL) {

From e88216b8f982bb2385aae3966ee96564bc802133 Mon Sep 17 00:00:00 2001
From: joncrall <erotemic@gmail.com>
Date: Sat, 29 Apr 2023 19:30:25 -0400
Subject: [PATCH 02/42] Cleanups and comments

---
 Modules/main.c | 51 +++++++++++++++++++++++++-------------------------
 1 file changed, 25 insertions(+), 26 deletions(-)

diff --git a/Modules/main.c b/Modules/main.c
index ef90c7d259ad93..122c8006aaf8ff 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -275,51 +275,43 @@ pymain_import_readline(const PyConfig *config)
 
 /*}                                                                            */
 
+
+/* Strip common leading whitespace from an input command */
 PyObject* _unicode_dedent(PyObject *unicode)
 {
-    PyObject *lines = PyUnicode_Splitlines(unicode, 1);
-    /*PyObject_Print(lines, stdout, 0);*/
-    /*fprintf(stdout, "\n");*/
-
-    Py_ssize_t num_lines = PyObject_Length(lines);
 
     PyObject* space = PyUnicode_FromWideChar(L" ", -1);
     PyObject* emptystr = PyUnicode_FromWideChar(L"", -1);
     PyObject* new_unicode;
 
-    // Initialize leading space to a large value to indicate
+    // Break up the input into lines
+    PyObject *lines = PyUnicode_Splitlines(unicode, 1);
+
+    // Init leading space to a large value to indicate
     // that it is uninitialized
     Py_ssize_t effective_inf = PyObject_Length(unicode) + 1;
     Py_ssize_t common_leading_spaces = effective_inf;
 
+    Py_ssize_t num_lines = PyObject_Length(lines);
     for (Py_ssize_t line_idx = 0; line_idx < num_lines; line_idx ++)
     {
         PyObject* index = PyLong_FromSsize_t(line_idx);
         PyObject* line = PyObject_GetItem(lines, index);
-        Py_ssize_t line_len = PyObject_Length(line);
-
         PyObject* striped_line = _PyUnicode_XStrip(line, 0, space);
-        Py_ssize_t stripline_len = PyObject_Length(striped_line);
 
+        // Determine the number of leading whitespace on this line.
+        Py_ssize_t line_len = PyObject_Length(line);
+        Py_ssize_t stripline_len = PyObject_Length(striped_line);
         Py_ssize_t leading_spaces = line_len - stripline_len;
 
         // On non-empty lines, see if the amount of leading whitespace is less
         // than current value. If so, update it.
-        if (line_len > 1)
+        if (line_len > 1 && leading_spaces < common_leading_spaces)
         {
-            if (leading_spaces < common_leading_spaces) {
-                common_leading_spaces = leading_spaces;
-            }
+            common_leading_spaces = leading_spaces;
         }
 
-        /*fprintf(stdout, "Index: %d\n", line_idx);                             */
-        /*fprintf(stdout, "Line Length: %d\n", line_len);                       */
-        /*fprintf(stdout, "Strip Line Length: %d\n", stripline_len);            */
-        /*fprintf(stdout, "leading_spaces: %d\n", leading_spaces);              */
-        /*fprintf(stdout, "common_leading_spaces: %d\n", common_leading_spaces);*/
-        /*fprintf(stdout, "Line: ");                                            */
-        /*PyObject_Print(PyObject_Repr(line), stdout, 1);*/
-        //fprintf(stdout, "\n");
+        Py_DECREF(striped_line);
         Py_DECREF(line);
         Py_DECREF(index);
     }
@@ -339,23 +331,28 @@ PyObject* _unicode_dedent(PyObject *unicode)
             }
             PyObject* new_line = PyUnicode_Substring(line, start, end);
             PyList_SetItem(new_lines, line_idx, new_line);
+
+            Py_DECREF(new_line);
             Py_DECREF(line);
             Py_DECREF(index);
         }
-        /*PyObject_Print(PyObject_Repr(new_lines), stdout, 0);*/
-        //fprintf(stdout, "\n");
-
         new_unicode = PyUnicode_Join(emptystr, new_lines);
 
+        Py_DECREF(new_lines);
+
+        // We are going to return an updated version of "unicode" that the
+        // caller will decref, so need to decref the version we are replacing
+        // here.  This feels fragile and like the wrong way to do this.
+        // Guidance here would be appreciated.
         Py_DECREF(unicode);
-        /*PyObject_Print(PyObject_Repr(new_unicode), stdout, 0);*/
     }
     else{
        new_unicode = unicode;
     }
 
-    //fprintf(stderr, "num_lines: %d\n", num_lines);
     Py_DECREF(lines);
+    Py_DECREF(space);
+    Py_DECREF(emptystr);
     return new_unicode;
 }
 
@@ -366,6 +363,7 @@ pymain_run_command(wchar_t *command)
     PyObject *unicode, *bytes;
     int ret;
 
+    // Should the input be modified here with pure C?
     //_command_dedent(wchar_t *command)
 
     unicode = PyUnicode_FromWideChar(command, -1);
@@ -377,6 +375,7 @@ pymain_run_command(wchar_t *command)
         return pymain_exit_err_print();
     }
 
+    // Should the input be modified here with the Python C-API?
     unicode = _unicode_dedent(unicode);
 
     bytes = PyUnicode_AsUTF8String(unicode);

From bcb7c77866ec856fc59a21a020a2f7d6a0b72fd2 Mon Sep 17 00:00:00 2001
From: joncrall <erotemic@gmail.com>
Date: Sat, 29 Apr 2023 19:43:14 -0400
Subject: [PATCH 03/42] Fix bad decref, only trigger if command starts with a
 newline

---
 Modules/main.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/Modules/main.c b/Modules/main.c
index 122c8006aaf8ff..352b355ed199e4 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -293,8 +293,7 @@ PyObject* _unicode_dedent(PyObject *unicode)
     Py_ssize_t common_leading_spaces = effective_inf;
 
     Py_ssize_t num_lines = PyObject_Length(lines);
-    for (Py_ssize_t line_idx = 0; line_idx < num_lines; line_idx ++)
-    {
+    for (Py_ssize_t line_idx = 0; line_idx < num_lines; line_idx ++) {
         PyObject* index = PyLong_FromSsize_t(line_idx);
         PyObject* line = PyObject_GetItem(lines, index);
         PyObject* striped_line = _PyUnicode_XStrip(line, 0, space);
@@ -306,8 +305,7 @@ PyObject* _unicode_dedent(PyObject *unicode)
 
         // On non-empty lines, see if the amount of leading whitespace is less
         // than current value. If so, update it.
-        if (line_len > 1 && leading_spaces < common_leading_spaces)
-        {
+        if (line_len > 1 && leading_spaces < common_leading_spaces) {
             common_leading_spaces = leading_spaces;
         }
 
@@ -316,23 +314,22 @@ PyObject* _unicode_dedent(PyObject *unicode)
         Py_DECREF(index);
     }
 
-    if (common_leading_spaces > 0 && common_leading_spaces < effective_inf){
+    if (common_leading_spaces > 0 && common_leading_spaces < effective_inf) {
 
         // We found common leading whitespace, strip if off.
         PyObject* new_lines = PyList_New(num_lines);
-        for (Py_ssize_t line_idx = 0; line_idx < num_lines; line_idx ++)
-        {
+        for (Py_ssize_t line_idx = 0; line_idx < num_lines; line_idx ++) {
             PyObject* index = PyLong_FromSsize_t(line_idx);
             PyObject* line = PyObject_GetItem(lines, index);
             Py_ssize_t end = PyObject_Length(line);
             Py_ssize_t start = common_leading_spaces;
-            if (end <= 1){
+            if (end <= 1) {
                 start = 0;
             }
             PyObject* new_line = PyUnicode_Substring(line, start, end);
             PyList_SetItem(new_lines, line_idx, new_line);
 
-            Py_DECREF(new_line);
+            //Py_DECREF(new_line);  // is it correct that we dont need to DECREF here?
             Py_DECREF(line);
             Py_DECREF(index);
         }
@@ -346,7 +343,7 @@ PyObject* _unicode_dedent(PyObject *unicode)
         // Guidance here would be appreciated.
         Py_DECREF(unicode);
     }
-    else{
+    else {
        new_unicode = unicode;
     }
 
@@ -375,8 +372,11 @@ pymain_run_command(wchar_t *command)
         return pymain_exit_err_print();
     }
 
-    // Should the input be modified here with the Python C-API?
-    unicode = _unicode_dedent(unicode);
+    // Only perform auto-dedent if the string starts with a newline
+    if (wcsncmp(command, L"\n", 1) == 0) {
+        // Should the input be modified here with the Python C-API?
+        unicode = _unicode_dedent(unicode);
+    }
 
     bytes = PyUnicode_AsUTF8String(unicode);
     Py_DECREF(unicode);

From fb8985aaad69e3c346a8b5eaf2e56871b96028be Mon Sep 17 00:00:00 2001
From: joncrall <erotemic@gmail.com>
Date: Sat, 29 Apr 2023 22:03:50 -0400
Subject: [PATCH 04/42] wchar dedent

---
 Modules/main.c | 187 ++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 137 insertions(+), 50 deletions(-)

diff --git a/Modules/main.c b/Modules/main.c
index 352b355ed199e4..9470e0e09d3659 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -228,57 +228,134 @@ pymain_import_readline(const PyConfig *config)
     }
 }
 
+#ifdef MS_WINDOWS
+#  define WCSTOK wcstok_s
+#else
+#  define WCSTOK wcstok
+#endif
+
+//#define DEBUG_DEDENT
+
+/* Strip common leading whitespace from an input command
+ * Sort of works?
+ * */
+wchar_t* _wcs_dedent(wchar_t *command)
+{
+    // Security problem, what is the right way to do this?
+    size_t nchars = wcslen(command);
+
+#ifdef DEBUG_DEDENT
+    fprintf(stderr, "\nSTART WCS_DEDENT\n");
+    fprintf(stderr, "command: '%ls'\n", command);
+    fprintf(stderr, "nchars: %d\n", nchars);
+#endif
+
+    wchar_t *command_copy = (wchar_t *)PyMem_Malloc(nchars * sizeof(wchar_t));
+    // wcscpy has security problems, what is the workaround?
+    wcscpy(command_copy, command);
+    // fprintf(stderr, "command_copy: '%ls'\n", command_copy);
+
+    size_t num_common_leading_spaces = nchars + 1;
+
+    // I wcstok has a problem because ignores multiple instances of the
+    // delimiter which may make programs behave differently if that newline
+    // belongs to a multiline string. However, it is ok if we just want to do a
+    // first pass over the data to find the common indentation on non-empty
+    // lines.
+    wchar_t *buffer;
+    wchar_t* line = WCSTOK(command_copy, L"\n", &buffer);
+    while (line) {
+
+        // Move the pointer up to the first non-space character
+        wchar_t *first_nonspace = line;
+        while (wcsncmp(first_nonspace, L" ", 1) == 0){
+            first_nonspace++;
+        }
 
-/*_command_dedent(wchar_t *command)                                            */
-/*{                                                                            */
-/*    // NEW CODE:                                                             */
-/*    // Remove common leading whitespace from the string                      */
-/*    // Handle dedenting the command                                          */
-/*    //                                                                       */
-/*    int cmdlen = wcslen(command);                                            */
-/*    fprintf(stderr, "COMMAND: %ls\n", command);                              */
-/*    fprintf(stderr, "cmdlen: %d\n", cmdlen);                                 */
-
-/*    int num_newlines = 0;                                                    */
-/*    int num_spaces = 0;                                                      */
-/*    for (int i = 0; i < cmdlen; i++)                                         */
-/*    {                                                                        */
-/*        if (wcsncmp(command + i, L"\n", 1) == 0){                            */
-/*            num_newlines++;                                                  */
-/*        }                                                                    */
-/*    }                                                                        */
-/*    int* line_endloc = (int*) malloc(sizeof(int) * num_newlines + 1);        */
-/*    int* line_lens = (int*) malloc(sizeof(int) * num_newlines + 1);          */
-/*    int* line_nleadingspaces = (int*) malloc(sizeof(int) * num_newlines + 1);*/
-
-/*    int curr_line = 0;                                                       */
-/*    int curr_line = 0;                                                       */
-/*    for (int i = 0; i < cmdlen; i++)                                         */
-/*    {                                                                        */
-/*        if (wcsncmp(command + i, L"\n", 1) == 0){                            */
-/*            num_newlines++;                                                  */
-/*        }                                                                    */
-/*    }                                                                        */
-
-/*    for (int i = 0; i < cmdlen; i++)                                         */
-/*    {                                                                        */
-/*        if (wcsncmp(command + i, L"\n", 1) == 0){                            */
-/*            num_newlines++;                                                  */
-/*        }                                                                    */
-/*        if (wcsncmp(command + i, L" ", 1) == 0){                             */
-/*            num_spaces++;                                                    */
-/*        }                                                                    */
-/*        fprintf(stderr, "command[%d] = '%lc'\n", i, command[i]);             */
-/*    }                                                                        */
-/*    fprintf(stderr, "num_newlines: %d\n", num_newlines);                     */
-/*    fprintf(stderr, "num_spaces: %d\n", num_spaces);                         */
-
-/*}                                                                            */
+        // Only check lines that contain non-whitespace characters
+        if (wcsncmp(first_nonspace, L"\0", 1)) {
+
+            size_t num_leading_spaces = first_nonspace - line;
+            if (num_leading_spaces < num_common_leading_spaces) {
+                num_common_leading_spaces = num_leading_spaces;
+            }
+#ifdef DEBUG_DEDENT
+         fprintf(stderr, "==========\n");
+         fprintf(stderr, "line: '%ls'\n", line);
+         fprintf(stderr, "first_nonspace: '%ls'\n", first_nonspace);
+         fprintf(stderr, "num_common_leading_spaces: '%d'\n", num_common_leading_spaces);
+         fprintf(stderr, "num_leading_spaces: '%d'\n", num_leading_spaces);
+#endif
+        }
+        line = WCSTOK(NULL, L"\n", &buffer);
+    }
+    PyMem_Free(command_copy);
+
+    wchar_t *end_ptr = command + nchars;
+    wchar_t *curr_line_ptr = command;
+    wchar_t *next_line_ptr;
+    wchar_t *new_start_loc;
+    size_t new_line_len;
+
+    // What is the correct way to ensure this is null terminated
+    wchar_t *new_command = (wchar_t *)PyMem_Malloc((nchars + 1) * sizeof(wchar_t));
+    wmemset(new_command, NULL, nchars + 1);
+    wchar_t *curr_dst = new_command;
+
+    while (curr_line_ptr != end_ptr) {
+        // Find the end of the current line.
+        next_line_ptr = wcsstr(curr_line_ptr, L"\n");
+        if (next_line_ptr == NULL) {
+            next_line_ptr = end_ptr;
+        }
+        else {
+            next_line_ptr++;
+        }
+
+        size_t line_len = next_line_ptr - curr_line_ptr;
+
+        if (line_len > num_common_leading_spaces){
+            new_start_loc = curr_line_ptr + num_common_leading_spaces;
+            new_line_len = line_len - num_common_leading_spaces;
+        }
+        else {
+            new_start_loc = curr_line_ptr;
+            new_line_len = line_len;
+        }
+
+        int offset = curr_line_ptr - command;
+
+#ifdef DEBUG_DEDENT
+        fprintf(stderr, "line_len: '%d'\n", line_len);
+        fprintf(stderr, "offset: '%d'\n", offset);
+#endif
+
+        // Copy the part of the line we want to keep to the new location
+        wcsncpy(curr_dst, new_start_loc, new_line_len);
+        curr_dst += new_line_len;
+
+        curr_line_ptr = next_line_ptr;
+    }
+
+    // FIXME: I'm sure this is not the memory safe way to do this, but I dont
+    // know what is.
+    command = new_command;
+
+#ifdef DEBUG_DEDENT
+    fprintf(stderr, "new_command: '%ls'\n", new_command);
+    fprintf(stderr, "\nEND WCS_DEDENT\n");
+#endif
+    return command;
+
+}
 
 
 /* Strip common leading whitespace from an input command */
 PyObject* _unicode_dedent(PyObject *unicode)
 {
+    /*fprintf(stderr, "\nSTART unicode dedent\n");      */
+    /*PyObject_Print(PyObject_Repr(unicode), stderr, 0);*/
+    /*fprintf(stderr, "\n");                            */
 
     PyObject* space = PyUnicode_FromWideChar(L" ", -1);
     PyObject* emptystr = PyUnicode_FromWideChar(L"", -1);
@@ -350,6 +427,11 @@ PyObject* _unicode_dedent(PyObject *unicode)
     Py_DECREF(lines);
     Py_DECREF(space);
     Py_DECREF(emptystr);
+
+    /*PyObject_Print(PyObject_Repr(new_unicode), stderr, 0);*/
+    /*fprintf(stderr, "\nEND unicode dedent\n");            */
+    /*fprintf(stderr, "\n");                                */
+
     return new_unicode;
 }
 
@@ -361,7 +443,12 @@ pymain_run_command(wchar_t *command)
     int ret;
 
     // Should the input be modified here with pure C?
-    //_command_dedent(wchar_t *command)
+    if (wcsncmp(command, L"\n", 1) == 0) {
+        command = _wcs_dedent(command);
+        if (command == NULL) {
+            goto error;
+        }
+    }
 
     unicode = PyUnicode_FromWideChar(command, -1);
     if (unicode == NULL) {
@@ -373,10 +460,10 @@ pymain_run_command(wchar_t *command)
     }
 
     // Only perform auto-dedent if the string starts with a newline
-    if (wcsncmp(command, L"\n", 1) == 0) {
-        // Should the input be modified here with the Python C-API?
-        unicode = _unicode_dedent(unicode);
-    }
+    /*if (wcsncmp(command, L"\n", 1) == 0) {                         */
+    /*    // Should the input be modified here with the Python C-API?*/
+    /*    unicode = _unicode_dedent(unicode);                        */
+    /*}                                                              */
 
     bytes = PyUnicode_AsUTF8String(unicode);
     Py_DECREF(unicode);

From 26f27a84cec932a1174cd04b37af6110c820c287 Mon Sep 17 00:00:00 2001
From: joncrall <erotemic@gmail.com>
Date: Sat, 29 Apr 2023 22:24:25 -0400
Subject: [PATCH 05/42] tweaks

---
 Modules/main.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/Modules/main.c b/Modules/main.c
index 9470e0e09d3659..e395f26013568d 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -250,6 +250,10 @@ wchar_t* _wcs_dedent(wchar_t *command)
     fprintf(stderr, "nchars: %d\n", nchars);
 #endif
 
+    // Step 1: Find N = the common number leading whitespace chars
+
+    // Create a copy of the command so we can use the descructive WCSTOK to
+    // tokenize it.
     wchar_t *command_copy = (wchar_t *)PyMem_Malloc(nchars * sizeof(wchar_t));
     // wcscpy has security problems, what is the workaround?
     wcscpy(command_copy, command);
@@ -283,8 +287,8 @@ wchar_t* _wcs_dedent(wchar_t *command)
          fprintf(stderr, "==========\n");
          fprintf(stderr, "line: '%ls'\n", line);
          fprintf(stderr, "first_nonspace: '%ls'\n", first_nonspace);
-         fprintf(stderr, "num_common_leading_spaces: '%d'\n", num_common_leading_spaces);
-         fprintf(stderr, "num_leading_spaces: '%d'\n", num_leading_spaces);
+         fprintf(stderr, "num_common_leading_spaces: '%zu'\n", num_common_leading_spaces);
+         fprintf(stderr, "num_leading_spaces: '%zu'\n", num_leading_spaces);
 #endif
         }
         line = WCSTOK(NULL, L"\n", &buffer);
@@ -297,9 +301,15 @@ wchar_t* _wcs_dedent(wchar_t *command)
     wchar_t *new_start_loc;
     size_t new_line_len;
 
+    // Step 2: Remove N leading whitespace chars from each line We do this by
+    // creating a new string and copying over each line one at a time and not
+    // copying over the leading whitespace
+
     // What is the correct way to ensure this is null terminated
+    // Is it ok that this is overallocated?
+    // Would we want to mutate the input pointer instead?
     wchar_t *new_command = (wchar_t *)PyMem_Malloc((nchars + 1) * sizeof(wchar_t));
-    wmemset(new_command, NULL, nchars + 1);
+    //wmemset(new_command, NULL, nchars + 1);
     wchar_t *curr_dst = new_command;
 
     while (curr_line_ptr != end_ptr) {
@@ -323,11 +333,8 @@ wchar_t* _wcs_dedent(wchar_t *command)
             new_line_len = line_len;
         }
 
-        int offset = curr_line_ptr - command;
-
 #ifdef DEBUG_DEDENT
-        fprintf(stderr, "line_len: '%d'\n", line_len);
-        fprintf(stderr, "offset: '%d'\n", offset);
+        fprintf(stderr, "line_len: '%zu'\n", line_len);
 #endif
 
         // Copy the part of the line we want to keep to the new location
@@ -336,6 +343,8 @@ wchar_t* _wcs_dedent(wchar_t *command)
 
         curr_line_ptr = next_line_ptr;
     }
+    // null terminate the string (is this sufficient?)
+    (*curr_dst) = NULL;
 
     // FIXME: I'm sure this is not the memory safe way to do this, but I dont
     // know what is.

From 417eff8e6f7a96390c703a8d4ba601356118ea65 Mon Sep 17 00:00:00 2001
From: joncrall <erotemic@gmail.com>
Date: Sun, 30 Apr 2023 19:47:48 -0400
Subject: [PATCH 06/42] Use new char* implementation

---
 Modules/main.c | 216 +++++++++++++------------------------------------
 1 file changed, 54 insertions(+), 162 deletions(-)

diff --git a/Modules/main.c b/Modules/main.c
index e395f26013568d..8a86b395bb77d4 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -228,93 +228,79 @@ pymain_import_readline(const PyConfig *config)
     }
 }
 
-#ifdef MS_WINDOWS
-#  define WCSTOK wcstok_s
-#else
-#  define WCSTOK wcstok
-#endif
 
-//#define DEBUG_DEDENT
+// #define DEBUG_DEDENT
 
-/* Strip common leading whitespace from an input command
- * Sort of works?
- * */
-wchar_t* _wcs_dedent(wchar_t *command)
-{
-    // Security problem, what is the right way to do this?
-    size_t nchars = wcslen(command);
+/* Strip common leading whitespace utf encoded string
+ * returns a new PyBytes object that must be deallocated
+ */
+PyObject* _pybytes_dedent(PyObject *bytes){
+    char *input_data = PyBytes_AsString(bytes);
+
+    // Security problem? what is the right way to do this?
+    Py_ssize_t nchars = strlen(input_data);
+
+    // Allocate new data for the output
+    PyBytesObject *new_bytes = PyBytes_FromStringAndSize(NULL, nchars);
+    if (new_bytes == NULL) {
+        return NULL;
+    }
+    char *new_data = PyBytes_AsString(new_bytes);
 
 #ifdef DEBUG_DEDENT
-    fprintf(stderr, "\nSTART WCS_DEDENT\n");
-    fprintf(stderr, "command: '%ls'\n", command);
+    fprintf(stderr, "\nSTART DEDENT\n");
+    fprintf(stderr, "input_data: '%s'\n", input_data);
     fprintf(stderr, "nchars: %d\n", nchars);
 #endif
 
     // Step 1: Find N = the common number leading whitespace chars
 
-    // Create a copy of the command so we can use the descructive WCSTOK to
-    // tokenize it.
-    wchar_t *command_copy = (wchar_t *)PyMem_Malloc(nchars * sizeof(wchar_t));
-    // wcscpy has security problems, what is the workaround?
-    wcscpy(command_copy, command);
-    // fprintf(stderr, "command_copy: '%ls'\n", command_copy);
-
-    size_t num_common_leading_spaces = nchars + 1;
-
-    // I wcstok has a problem because ignores multiple instances of the
-    // delimiter which may make programs behave differently if that newline
-    // belongs to a multiline string. However, it is ok if we just want to do a
-    // first pass over the data to find the common indentation on non-empty
-    // lines.
-    wchar_t *buffer;
-    wchar_t* line = WCSTOK(command_copy, L"\n", &buffer);
-    while (line) {
+    // Use the output array as a temporary buffer (because we haven't populated it yet)
+    // so we can use the descructive strtok to tokenize the input.
+    strcpy(new_data, input_data);
+
+    Py_ssize_t num_common_leading_spaces = nchars + 1;
 
+    // Count the number of leading spaces on each line
+    char *line = strtok(new_data, "\n");
+    while (line) {
         // Move the pointer up to the first non-space character
-        wchar_t *first_nonspace = line;
-        while (wcsncmp(first_nonspace, L" ", 1) == 0){
+        char *first_nonspace = line;
+        while (strncmp(first_nonspace, " ", 1) == 0){
             first_nonspace++;
         }
-
         // Only check lines that contain non-whitespace characters
-        if (wcsncmp(first_nonspace, L"\0", 1)) {
+        if (strncmp(first_nonspace, "\0", 1)) {
 
-            size_t num_leading_spaces = first_nonspace - line;
+            Py_ssize_t num_leading_spaces = first_nonspace - line;
             if (num_leading_spaces < num_common_leading_spaces) {
                 num_common_leading_spaces = num_leading_spaces;
             }
 #ifdef DEBUG_DEDENT
          fprintf(stderr, "==========\n");
-         fprintf(stderr, "line: '%ls'\n", line);
-         fprintf(stderr, "first_nonspace: '%ls'\n", first_nonspace);
+         fprintf(stderr, "line: '%s'\n", line);
+         fprintf(stderr, "first_nonspace: '%s'\n", first_nonspace);
          fprintf(stderr, "num_common_leading_spaces: '%zu'\n", num_common_leading_spaces);
          fprintf(stderr, "num_leading_spaces: '%zu'\n", num_leading_spaces);
 #endif
         }
-        line = WCSTOK(NULL, L"\n", &buffer);
+        line = strtok(NULL, "\n");
     }
-    PyMem_Free(command_copy);
 
-    wchar_t *end_ptr = command + nchars;
-    wchar_t *curr_line_ptr = command;
-    wchar_t *next_line_ptr;
-    wchar_t *new_start_loc;
-    size_t new_line_len;
+    char *end_ptr = input_data + nchars;
+    char *curr_line_ptr = input_data;
+    char *next_line_ptr;
+    char *new_start_loc;
+    Py_ssize_t new_line_len;
 
     // Step 2: Remove N leading whitespace chars from each line We do this by
     // creating a new string and copying over each line one at a time and not
     // copying over the leading whitespace
 
-    // What is the correct way to ensure this is null terminated
-    // Is it ok that this is overallocated?
-    // Would we want to mutate the input pointer instead?
-    wchar_t *new_command = (wchar_t *)PyMem_Malloc((nchars + 1) * sizeof(wchar_t));
-    //wmemset(new_command, NULL, nchars + 1);
-    wchar_t *curr_dst = new_command;
-
+    char *curr_dst = new_data;
     while (curr_line_ptr != end_ptr) {
         // Find the end of the current line.
-        next_line_ptr = wcsstr(curr_line_ptr, L"\n");
+        next_line_ptr = strstr(curr_line_ptr, "\n");
         if (next_line_ptr == NULL) {
             next_line_ptr = end_ptr;
         }
@@ -322,7 +308,7 @@ wchar_t* _wcs_dedent(wchar_t *command)
             next_line_ptr++;
         }
 
-        size_t line_len = next_line_ptr - curr_line_ptr;
+        Py_ssize_t line_len = next_line_ptr - curr_line_ptr;
 
         if (line_len > num_common_leading_spaces){
             new_start_loc = curr_line_ptr + num_common_leading_spaces;
@@ -336,9 +322,8 @@ wchar_t* _wcs_dedent(wchar_t *command)
 #ifdef DEBUG_DEDENT
         fprintf(stderr, "line_len: '%zu'\n", line_len);
 #endif
-
         // Copy the part of the line we want to keep to the new location
-        wcsncpy(curr_dst, new_start_loc, new_line_len);
+        strncpy(curr_dst, new_start_loc, new_line_len);
         curr_dst += new_line_len;
 
         curr_line_ptr = next_line_ptr;
@@ -346,102 +331,13 @@ wchar_t* _wcs_dedent(wchar_t *command)
     // null terminate the string (is this sufficient?)
     (*curr_dst) = NULL;
 
-    // FIXME: I'm sure this is not the memory safe way to do this, but I dont
-    // know what is.
-    command = new_command;
-
 #ifdef DEBUG_DEDENT
-    fprintf(stderr, "new_command: '%ls'\n", new_command);
+    fprintf(stderr, "new_data: '%s'\n", new_data);
     fprintf(stderr, "\nEND WCS_DEDENT\n");
 #endif
-    return command;
-
-}
-
-
-/* Strip common leading whitespace from an input command */
-PyObject* _unicode_dedent(PyObject *unicode)
-{
-    /*fprintf(stderr, "\nSTART unicode dedent\n");      */
-    /*PyObject_Print(PyObject_Repr(unicode), stderr, 0);*/
-    /*fprintf(stderr, "\n");                            */
-
-    PyObject* space = PyUnicode_FromWideChar(L" ", -1);
-    PyObject* emptystr = PyUnicode_FromWideChar(L"", -1);
-    PyObject* new_unicode;
-
-    // Break up the input into lines
-    PyObject *lines = PyUnicode_Splitlines(unicode, 1);
-
-    // Init leading space to a large value to indicate
-    // that it is uninitialized
-    Py_ssize_t effective_inf = PyObject_Length(unicode) + 1;
-    Py_ssize_t common_leading_spaces = effective_inf;
-
-    Py_ssize_t num_lines = PyObject_Length(lines);
-    for (Py_ssize_t line_idx = 0; line_idx < num_lines; line_idx ++) {
-        PyObject* index = PyLong_FromSsize_t(line_idx);
-        PyObject* line = PyObject_GetItem(lines, index);
-        PyObject* striped_line = _PyUnicode_XStrip(line, 0, space);
-
-        // Determine the number of leading whitespace on this line.
-        Py_ssize_t line_len = PyObject_Length(line);
-        Py_ssize_t stripline_len = PyObject_Length(striped_line);
-        Py_ssize_t leading_spaces = line_len - stripline_len;
-
-        // On non-empty lines, see if the amount of leading whitespace is less
-        // than current value. If so, update it.
-        if (line_len > 1 && leading_spaces < common_leading_spaces) {
-            common_leading_spaces = leading_spaces;
-        }
-
-        Py_DECREF(striped_line);
-        Py_DECREF(line);
-        Py_DECREF(index);
-    }
+    return new_bytes;
 
-    if (common_leading_spaces > 0 && common_leading_spaces < effective_inf) {
 
-        // We found common leading whitespace, strip if off.
-        PyObject* new_lines = PyList_New(num_lines);
-        for (Py_ssize_t line_idx = 0; line_idx < num_lines; line_idx ++) {
-            PyObject* index = PyLong_FromSsize_t(line_idx);
-            PyObject* line = PyObject_GetItem(lines, index);
-            Py_ssize_t end = PyObject_Length(line);
-            Py_ssize_t start = common_leading_spaces;
-            if (end <= 1) {
-                start = 0;
-            }
-            PyObject* new_line = PyUnicode_Substring(line, start, end);
-            PyList_SetItem(new_lines, line_idx, new_line);
-
-            //Py_DECREF(new_line);  // is it correct that we dont need to DECREF here?
-            Py_DECREF(line);
-            Py_DECREF(index);
-        }
-        new_unicode = PyUnicode_Join(emptystr, new_lines);
-
-        Py_DECREF(new_lines);
-
-        // We are going to return an updated version of "unicode" that the
-        // caller will decref, so need to decref the version we are replacing
-        // here.  This feels fragile and like the wrong way to do this.
-        // Guidance here would be appreciated.
-        Py_DECREF(unicode);
-    }
-    else {
-       new_unicode = unicode;
-    }
-
-    Py_DECREF(lines);
-    Py_DECREF(space);
-    Py_DECREF(emptystr);
-
-    /*PyObject_Print(PyObject_Repr(new_unicode), stderr, 0);*/
-    /*fprintf(stderr, "\nEND unicode dedent\n");            */
-    /*fprintf(stderr, "\n");                                */
-
-    return new_unicode;
 }
 
 
@@ -451,14 +347,6 @@ pymain_run_command(wchar_t *command)
     PyObject *unicode, *bytes;
     int ret;
 
-    // Should the input be modified here with pure C?
-    if (wcsncmp(command, L"\n", 1) == 0) {
-        command = _wcs_dedent(command);
-        if (command == NULL) {
-            goto error;
-        }
-    }
-
     unicode = PyUnicode_FromWideChar(command, -1);
     if (unicode == NULL) {
         goto error;
@@ -468,18 +356,22 @@ pymain_run_command(wchar_t *command)
         return pymain_exit_err_print();
     }
 
-    // Only perform auto-dedent if the string starts with a newline
-    /*if (wcsncmp(command, L"\n", 1) == 0) {                         */
-    /*    // Should the input be modified here with the Python C-API?*/
-    /*    unicode = _unicode_dedent(unicode);                        */
-    /*}                                                              */
-
     bytes = PyUnicode_AsUTF8String(unicode);
     Py_DECREF(unicode);
     if (bytes == NULL) {
         goto error;
     }
 
+    // Only perform auto-dedent if the string starts with a newline
+    if (strncmp(PyBytes_AsString(bytes), "\n", 1) == 0) {
+        PyObject *new_bytes = _pybytes_dedent(bytes);
+        if (new_bytes == NULL) {
+            goto error;
+        }
+        Py_DECREF(bytes);
+        bytes = new_bytes;
+    }
+
     PyCompilerFlags cf = _PyCompilerFlags_INIT;
     cf.cf_flags |= PyCF_IGNORE_COOKIE;
     ret = PyRun_SimpleStringFlags(PyBytes_AsString(bytes), &cf);

From 924e0a6897f452ac1cf3161ae2d9202e2acaa992 Mon Sep 17 00:00:00 2001
From: joncrall <erotemic@gmail.com>
Date: Sun, 30 Apr 2023 19:48:55 -0400
Subject: [PATCH 07/42] Rename function

---
 Modules/main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Modules/main.c b/Modules/main.c
index 8a86b395bb77d4..55ea0025520dac 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -234,7 +234,7 @@ pymain_import_readline(const PyConfig *config)
 /* Strip common leading whitespace utf encoded string
  * returns a new PyBytes object that must be deallocated
  */
-PyObject* _pybytes_dedent(PyObject *bytes){
+PyObject* _utf_8_bytes_dedent(PyObject *bytes){
     char *input_data = PyBytes_AsString(bytes);
 
     // Security problem? what is the right way to do this?
@@ -364,7 +364,7 @@ pymain_run_command(wchar_t *command)
 
     // Only perform auto-dedent if the string starts with a newline
     if (strncmp(PyBytes_AsString(bytes), "\n", 1) == 0) {
-        PyObject *new_bytes = _pybytes_dedent(bytes);
+        PyObject *new_bytes = _utf_8_bytes_dedent(bytes);
         if (new_bytes == NULL) {
             goto error;
         }

From 9f956726057b98cbe3f6e40a1620b56ab62de912 Mon Sep 17 00:00:00 2001
From: joncrall <erotemic@gmail.com>
Date: Sun, 30 Apr 2023 19:59:07 -0400
Subject: [PATCH 08/42] tweaks

---
 Modules/main.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Modules/main.c b/Modules/main.c
index 55ea0025520dac..f69c322d4a5379 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -293,12 +293,12 @@ PyObject* _utf_8_bytes_dedent(PyObject *bytes){
     char *new_start_loc;
     Py_ssize_t new_line_len;
 
-    // Step 2: Remove N leading whitespace chars from each line We do this by
-    // creating a new string and copying over each line one at a time and not
-    // copying over the leading whitespace
+    // Step 2: Remove N leading whitespace chars from each line by copying data
+    // (except leading spaces) from the input buffer to the output buffer one
+    // line at a time.
 
     char *curr_dst = new_data;
-    while (curr_line_ptr != end_ptr) {
+    while (curr_line_ptr < end_ptr) {
         // Find the end of the current line.
         next_line_ptr = strstr(curr_line_ptr, "\n");
         if (next_line_ptr == NULL) {

From 3f4a78bf047ab45f452bf89a4d27ae4bdb64e171 Mon Sep 17 00:00:00 2001
From: joncrall <erotemic@gmail.com>
Date: Sun, 30 Apr 2023 20:05:34 -0400
Subject: [PATCH 09/42] More tweaks

---
 Modules/main.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/Modules/main.c b/Modules/main.c
index f69c322d4a5379..9ba8de056639f3 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -231,17 +231,15 @@ pymain_import_readline(const PyConfig *config)
 
 // #define DEBUG_DEDENT
 
-/* Strip common leading whitespace utf encoded string
- * returns a new PyBytes object that must be deallocated
- */
+/* Strip common leading whitespace utf encoded string */
 PyObject* _utf_8_bytes_dedent(PyObject *bytes){
-    char *input_data = PyBytes_AsString(bytes);
+    char *input_data;
+    Py_ssize_t nchars;
 
-    // Security problem? what is the right way to do this?
-    Py_ssize_t nchars = strlen(input_data);
+    PyBytes_AsStringAndSize(bytes, &input_data, &nchars);
 
-    // Allocate new data for the output
-    PyBytesObject *new_bytes = PyBytes_FromStringAndSize(NULL, nchars);
+    // Allocate new data for the output as a copy of the input
+    PyBytesObject *new_bytes = PyBytes_FromStringAndSize(input_data, nchars);
     if (new_bytes == NULL) {
         return NULL;
     }
@@ -255,13 +253,11 @@ PyObject* _utf_8_bytes_dedent(PyObject *bytes){
 
     // Step 1: Find N = the common number leading whitespace chars
 
-    // Use the output array as a temporary buffer (because we haven't populated it yet)
     // so we can use the descructive strtok to tokenize the input.
-    strcpy(new_data, input_data);
-
     Py_ssize_t num_common_leading_spaces = nchars + 1;
 
     // Count the number of leading spaces on each line
+    // Use the output array as a temporary buffer (we will repopulate it later)
     char *line = strtok(new_data, "\n");
     while (line) {
         // Move the pointer up to the first non-space character

From 97f2079c46b68bd835f715435058e72bed891d23 Mon Sep 17 00:00:00 2001
From: joncrall <erotemic@gmail.com>
Date: Sun, 30 Apr 2023 22:08:23 -0400
Subject: [PATCH 10/42] Replace strncmp with direct char comparison

---
 Modules/main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Modules/main.c b/Modules/main.c
index 9ba8de056639f3..27295a8cb0dfcb 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -262,11 +262,11 @@ PyObject* _utf_8_bytes_dedent(PyObject *bytes){
     while (line) {
         // Move the pointer up to the first non-space character
         char *first_nonspace = line;
-        while (strncmp(first_nonspace, " ", 1) == 0){
+        while (*first_nonspace == ' '){
             first_nonspace++;
         }
         // Only check lines that contain non-whitespace characters
-        if (strncmp(first_nonspace, "\0", 1)) {
+        if (*first_nonspace != '\0') {
 
             Py_ssize_t num_leading_spaces = first_nonspace - line;
             if (num_leading_spaces < num_common_leading_spaces) {
@@ -359,7 +359,7 @@ pymain_run_command(wchar_t *command)
     }
 
     // Only perform auto-dedent if the string starts with a newline
-    if (strncmp(PyBytes_AsString(bytes), "\n", 1) == 0) {
+    if (*PyBytes_AsString(bytes) == '\n') {
         PyObject *new_bytes = _utf_8_bytes_dedent(bytes);
         if (new_bytes == NULL) {
             goto error;

From 04435eb4a79ae5e3940cfa804f1011287b901f68 Mon Sep 17 00:00:00 2001
From: joncrall <erotemic@gmail.com>
Date: Sun, 30 Apr 2023 22:12:12 -0400
Subject: [PATCH 11/42] Remove debug code

---
 Modules/main.c | 24 ------------------------
 1 file changed, 24 deletions(-)

diff --git a/Modules/main.c b/Modules/main.c
index 27295a8cb0dfcb..4e60b5e1e2bee0 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -229,8 +229,6 @@ pymain_import_readline(const PyConfig *config)
 }
 
 
-// #define DEBUG_DEDENT
-
 /* Strip common leading whitespace utf encoded string */
 PyObject* _utf_8_bytes_dedent(PyObject *bytes){
     char *input_data;
@@ -245,12 +243,6 @@ PyObject* _utf_8_bytes_dedent(PyObject *bytes){
     }
     char *new_data = PyBytes_AsString(new_bytes);
 
-#ifdef DEBUG_DEDENT
-    fprintf(stderr, "\nSTART DEDENT\n");
-    fprintf(stderr, "input_data: '%s'\n", input_data);
-    fprintf(stderr, "nchars: %d\n", nchars);
-#endif
-
     // Step 1: Find N = the common number leading whitespace chars
 
     // so we can use the descructive strtok to tokenize the input.
@@ -272,13 +264,6 @@ PyObject* _utf_8_bytes_dedent(PyObject *bytes){
             if (num_leading_spaces < num_common_leading_spaces) {
                 num_common_leading_spaces = num_leading_spaces;
             }
-#ifdef DEBUG_DEDENT
-         fprintf(stderr, "==========\n");
-         fprintf(stderr, "line: '%s'\n", line);
-         fprintf(stderr, "first_nonspace: '%s'\n", first_nonspace);
-         fprintf(stderr, "num_common_leading_spaces: '%zu'\n", num_common_leading_spaces);
-         fprintf(stderr, "num_leading_spaces: '%zu'\n", num_leading_spaces);
-#endif
         }
         line = strtok(NULL, "\n");
     }
@@ -315,9 +300,6 @@ PyObject* _utf_8_bytes_dedent(PyObject *bytes){
             new_line_len = line_len;
         }
 
-#ifdef DEBUG_DEDENT
-        fprintf(stderr, "line_len: '%zu'\n", line_len);
-#endif
         // Copy the part of the line we want to keep to the new location
         strncpy(curr_dst, new_start_loc, new_line_len);
         curr_dst += new_line_len;
@@ -327,13 +309,7 @@ PyObject* _utf_8_bytes_dedent(PyObject *bytes){
     // null terminate the string (is this sufficient?)
     (*curr_dst) = NULL;
 
-#ifdef DEBUG_DEDENT
-    fprintf(stderr, "new_data: '%s'\n", new_data);
-    fprintf(stderr, "\nEND WCS_DEDENT\n");
-#endif
     return new_bytes;
-
-
 }
 
 

From 4c4eca9bc6228b6eebfdca2d30fa94396de6c91a Mon Sep 17 00:00:00 2001
From: joncrall <erotemic@gmail.com>
Date: Sun, 30 Apr 2023 23:44:52 -0400
Subject: [PATCH 12/42] Made new function static

---
 Modules/main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Modules/main.c b/Modules/main.c
index 4e60b5e1e2bee0..8959ee297f7e4f 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -230,7 +230,8 @@ pymain_import_readline(const PyConfig *config)
 
 
 /* Strip common leading whitespace utf encoded string */
-PyObject* _utf_8_bytes_dedent(PyObject *bytes){
+static PyObject*
+_utf_8_bytes_dedent(PyObject *bytes){
     char *input_data;
     Py_ssize_t nchars;
 

From f9c969be644eda481c15595566cdb487127c0345 Mon Sep 17 00:00:00 2001
From: joncrall <erotemic@gmail.com>
Date: Mon, 1 May 2023 01:23:05 -0400
Subject: [PATCH 13/42] Handwritten char iter and _PyBytesWriter_

---
 Modules/main.c | 106 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 102 insertions(+), 4 deletions(-)

diff --git a/Modules/main.c b/Modules/main.c
index 8959ee297f7e4f..c00e3f06f55a35 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -234,9 +234,105 @@ static PyObject*
 _utf_8_bytes_dedent(PyObject *bytes){
     char *input_data;
     Py_ssize_t nchars;
+    bool curr_reading_whitespace = true;
+    int curr_num_leading_spaces = 0;
+    int curr_num_leading_tabs = 0;
+    int num_common_leading_chars;
+    char c;
 
     PyBytes_AsStringAndSize(bytes, &input_data, &nchars);
 
+    int num_common_leading_spaces = nchars + 1;
+    int num_common_leading_tabs = nchars + 1;
+
+    char *data_iter;
+
+    data_iter = input_data;
+    while ( (c = *data_iter++) ){
+        if (c == '\n') {
+            // Finished reading the line
+            if (!curr_reading_whitespace) {
+                // If the line had some non-whitespace characters
+                // update the current common leading tab/space count
+                if (curr_num_leading_spaces < num_common_leading_spaces) {
+                    num_common_leading_spaces = curr_num_leading_spaces;
+                }
+                if (curr_num_leading_tabs < num_common_leading_tabs) {
+                    num_common_leading_tabs = curr_num_leading_tabs;
+                }
+            }
+            // About to start reading a new line
+            curr_reading_whitespace = true;
+            curr_num_leading_spaces = 0;
+            curr_num_leading_tabs = 0;
+        }
+        else if (curr_reading_whitespace){
+            if (c == ' ' && curr_num_leading_tabs == 0) {
+                curr_num_leading_spaces++;
+            }
+            else if (c == '\t' && curr_num_leading_spaces == 0) {
+                curr_num_leading_tabs++;
+            }
+            else {
+                // Encountered a non-whitespace character
+                curr_reading_whitespace = false;
+            }
+        }
+    }
+    if (num_common_leading_spaces > num_common_leading_tabs){
+        num_common_leading_chars = num_common_leading_spaces;
+    }
+    else {
+        num_common_leading_chars = num_common_leading_tabs;
+    }
+
+    if (num_common_leading_chars > 0) {
+        // We need to trigger a dedent
+        char *new_data;
+        char *curr_line_ptr = input_data;
+        char *next_line_ptr;
+        char *new_start_loc;
+        Py_ssize_t new_line_len;
+        _PyBytesWriter writer;
+        _PyBytesWriter_Init(&writer);
+        new_data = _PyBytesWriter_Alloc(&writer, nchars);
+        if (new_data == NULL) {
+            return NULL;
+        }
+
+        data_iter = input_data;
+        c = *data_iter;
+        while (c) {
+            // Find the end of the current line.
+            while ( (c = *data_iter++) != '\n' ){
+                if (c == NULL) {
+                    break;
+                }
+            }
+            next_line_ptr = data_iter;
+            Py_ssize_t line_len = next_line_ptr - curr_line_ptr;
+            if (line_len > num_common_leading_chars){
+                new_start_loc = curr_line_ptr + num_common_leading_chars;
+                new_line_len = line_len - num_common_leading_chars;
+            }
+            else {
+                new_start_loc = curr_line_ptr;
+                new_line_len = line_len;
+            }
+            // Copy this line over to the new buffer (removing common
+            // leading chars)
+            new_data = _PyBytesWriter_WriteBytes(&writer, new_data, new_start_loc, new_line_len);
+            curr_line_ptr = next_line_ptr;
+        }
+        PyObject *new_bytes = _PyBytesWriter_Finish(&writer, new_data);
+        return new_bytes;
+    }
+    else {
+        // No leading chars, no work to be done.
+        return bytes;
+    }
+
+#if 0
     // Allocate new data for the output as a copy of the input
     PyBytesObject *new_bytes = PyBytes_FromStringAndSize(input_data, nchars);
     if (new_bytes == NULL) {
@@ -245,8 +341,6 @@ _utf_8_bytes_dedent(PyObject *bytes){
     char *new_data = PyBytes_AsString(new_bytes);
 
     // Step 1: Find N = the common number leading whitespace chars
-
-    // so we can use the descructive strtok to tokenize the input.
     Py_ssize_t num_common_leading_spaces = nchars + 1;
 
     // Count the number of leading spaces on each line
@@ -309,8 +403,9 @@ _utf_8_bytes_dedent(PyObject *bytes){
     }
     // null terminate the string (is this sufficient?)
     (*curr_dst) = NULL;
-
     return new_bytes;
+#endif
+
 }
 
 
@@ -341,7 +436,10 @@ pymain_run_command(wchar_t *command)
         if (new_bytes == NULL) {
             goto error;
         }
-        Py_DECREF(bytes);
+        if (new_bytes != bytes) {
+            // dedent allocated new bytes, replace the old with the new
+            Py_DECREF(bytes);
+        }
         bytes = new_bytes;
     }
 

From 674f1e07d4f3ba7ba5702984017627af58e5e5b6 Mon Sep 17 00:00:00 2001
From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com>
Date: Mon, 1 May 2023 21:42:21 +0800
Subject: [PATCH 14/42] reimplement it to imitate `textwrap.dedent`

---
 Modules/main.c | 276 +++++++++++++++++++++----------------------------
 1 file changed, 115 insertions(+), 161 deletions(-)

diff --git a/Modules/main.c b/Modules/main.c
index c00e3f06f55a35..c3c4f1b7ce285c 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -228,187 +228,149 @@ pymain_import_readline(const PyConfig *config)
     }
 }
 
+/* Strip common leading whitespace, just as textwrap.dedent.
+   It stoles 1 reference from bytes if succeeded, else it will return NULL. */
+static PyObject *dedent_utf8_bytes(PyObject *bytes) {
+    if (bytes == NULL || !PyBytes_CheckExact(bytes)) {
+        return NULL;
+    }
 
-/* Strip common leading whitespace utf encoded string */
-static PyObject*
-_utf_8_bytes_dedent(PyObject *bytes){
-    char *input_data;
+    char *start;
     Py_ssize_t nchars;
-    bool curr_reading_whitespace = true;
-    int curr_num_leading_spaces = 0;
-    int curr_num_leading_tabs = 0;
-    int num_common_leading_chars;
-    char c;
-
-    PyBytes_AsStringAndSize(bytes, &input_data, &nchars);
-
-    int num_common_leading_spaces = nchars + 1;
-    int num_common_leading_tabs = nchars + 1;
-
-    char *data_iter;
-
-    data_iter = input_data;
-    while ( (c = *data_iter++) ){
-        if (c == '\n') {
-            // Finished reading the line
-            if (!curr_reading_whitespace) {
-                // If the line had some non-whitespace characters
-                // update the current common leading tab/space count
-                if (curr_num_leading_spaces < num_common_leading_spaces) {
-                    num_common_leading_spaces = curr_num_leading_spaces;
-                }
-                if (curr_num_leading_tabs < num_common_leading_tabs) {
-                    num_common_leading_tabs = curr_num_leading_tabs;
+
+    if (PyBytes_AsStringAndSize(bytes, &start, &nchars) != 0) {
+        return NULL;
+    }
+
+    char *end = start + nchars;
+    assert(start < end);
+
+    char *candidate_start = NULL;
+    Py_ssize_t candidate_len = 0;
+
+    for (char *iter = start; iter < end; ++iter) {
+        char *line_start = iter;
+        char *leading_whitespace_end = NULL;
+
+        // scan the whole line
+        char c = 0;
+        while (iter < end && (c = *iter) != '\n') {
+            if (!leading_whitespace_end && c != ' ' && c != '\t') {
+                if (iter == line_start) {
+                    // some line has no indent, fast exit!
+                    return bytes;
                 }
+                leading_whitespace_end = iter;
             }
-            // About to start reading a new line
-            curr_reading_whitespace = true;
-            curr_num_leading_spaces = 0;
-            curr_num_leading_tabs = 0;
-        }
-        else if (curr_reading_whitespace){
-            if (c == ' ' && curr_num_leading_tabs == 0) {
-                curr_num_leading_spaces++;
-            }
-            else if (c == '\t' && curr_num_leading_spaces == 0) {
-                curr_num_leading_tabs++;
-            }
-            else {
-                // Encountered a non-whitespace character
-                curr_reading_whitespace = false;
-            }
+            ++iter;
         }
-    }
-    if (num_common_leading_spaces > num_common_leading_tabs){
-        num_common_leading_chars = num_common_leading_spaces;
-    }
-    else {
-        num_common_leading_chars = num_common_leading_tabs;
-    }
-
-    if (num_common_leading_chars > 0) {
-        // We need to trigger a dedent
-        char *new_data;
-        char *curr_line_ptr = input_data;
-        char *next_line_ptr;
-        char *new_start_loc;
-        Py_ssize_t new_line_len;
-        _PyBytesWriter writer;
-        _PyBytesWriter_Init(&writer);
-        new_data = _PyBytesWriter_Alloc(&writer, nchars);
-        if (new_data == NULL) {
-            return NULL;
+
+        // we reach the end of a line
+
+        // if this line has all white space, skip it
+        if (!leading_whitespace_end) {
+            continue;
         }
 
-        data_iter = input_data;
-        c = *data_iter;
-        while (c) {
-            // Find the end of the current line.
-            while ( (c = *data_iter++) != '\n' ){
-                if (c == NULL) {
+        if (!candidate_start) {
+            candidate_start = line_start;
+            candidate_len = leading_whitespace_end - line_start;
+            assert(candidate_len > 0);
+        } else {
+            /* We then compare with the current longest leading whitespace.
+
+               [line_start, leading_whitespace_end) is the leading whitespace of
+               this line,
+
+               [candidate_start, candidate_start + candidate_len)
+               is the leading whitespace of the current longest leading
+               whitespace. */
+            Py_ssize_t new_candidate_len = 0;
+
+            for (char *candidate_iter = candidate_start,
+                      *line_iter = line_start;
+                 candidate_iter < candidate_start + candidate_len &&
+                 line_iter < leading_whitespace_end;
+                 ++candidate_iter, ++line_iter) {
+                if (*candidate_iter != *line_iter) {
                     break;
                 }
+                ++new_candidate_len;
             }
-            next_line_ptr = data_iter;
-            Py_ssize_t line_len = next_line_ptr - curr_line_ptr;
-            if (line_len > num_common_leading_chars){
-                new_start_loc = curr_line_ptr + num_common_leading_chars;
-                new_line_len = line_len - num_common_leading_chars;
-            }
-            else {
-                new_start_loc = curr_line_ptr;
-                new_line_len = line_len;
+
+            candidate_len = new_candidate_len;
+            if (candidate_len == 0) {
+                return bytes;
             }
-            // Copy this line over to the new buffer (removing common
-            // leading chars)
-            new_data = _PyBytesWriter_WriteBytes(&writer, new_data, new_start_loc, new_line_len);
-            curr_line_ptr = next_line_ptr;
         }
-        PyObject *new_bytes = _PyBytesWriter_Finish(&writer, new_data);
-        return new_bytes;
-    }
-    else {
-        // No leading chars, no work to be done.
-        return bytes;
-    }
-
-#if 0
-    // Allocate new data for the output as a copy of the input
-    PyBytesObject *new_bytes = PyBytes_FromStringAndSize(input_data, nchars);
-    if (new_bytes == NULL) {
-        return NULL;
     }
-    char *new_data = PyBytes_AsString(new_bytes);
 
-    // Step 1: Find N = the common number leading whitespace chars
-    Py_ssize_t num_common_leading_spaces = nchars + 1;
+    assert(candidate_len > 0);
 
-    // Count the number of leading spaces on each line
-    // Use the output array as a temporary buffer (we will repopulate it later)
-    char *line = strtok(new_data, "\n");
-    while (line) {
-        // Move the pointer up to the first non-space character
-        char *first_nonspace = line;
-        while (*first_nonspace == ' '){
-            first_nonspace++;
-        }
-        // Only check lines that contain non-whitespace characters
-        if (*first_nonspace != '\0') {
+    // trigger a dedent
+    char *p;
+    PyObject *new_bytes;
+    char *line_start;
+    Py_ssize_t new_line_len;
+    bool in_leading_space;
+    _PyBytesWriter writer;
 
-            Py_ssize_t num_leading_spaces = first_nonspace - line;
-            if (num_leading_spaces < num_common_leading_spaces) {
-                num_common_leading_spaces = num_leading_spaces;
-            }
-        }
-        line = strtok(NULL, "\n");
+    _PyBytesWriter_Init(&writer);
+    p = _PyBytesWriter_Alloc(&writer, nchars);
+    if (p == NULL) {
+        goto error;
     }
 
-    char *end_ptr = input_data + nchars;
-    char *curr_line_ptr = input_data;
-    char *next_line_ptr;
-    char *new_start_loc;
-    Py_ssize_t new_line_len;
+    for (char *iter = start; iter < end; ++iter) {
+        line_start = iter;
 
-    // Step 2: Remove N leading whitespace chars from each line by copying data
-    // (except leading spaces) from the input buffer to the output buffer one
-    // line at a time.
-
-    char *curr_dst = new_data;
-    while (curr_line_ptr < end_ptr) {
-        // Find the end of the current line.
-        next_line_ptr = strstr(curr_line_ptr, "\n");
-        if (next_line_ptr == NULL) {
-            next_line_ptr = end_ptr;
-        }
-        else {
-            next_line_ptr++;
+        // iterate over a line
+        while (iter < end && *iter != '\n') {
+            if (in_leading_space && *iter != ' ' && *iter != '\t') {
+                in_leading_space = false;
+            }
+            ++iter;
         }
 
-        Py_ssize_t line_len = next_line_ptr - curr_line_ptr;
+        // invariant: *iter == '\n' or iter == end
 
-        if (line_len > num_common_leading_spaces){
-            new_start_loc = curr_line_ptr + num_common_leading_spaces;
-            new_line_len = line_len - num_common_leading_spaces;
+        // if this line has all white space, write '\n'
+        if (in_leading_space) {
+            p = _PyBytesWriter_Prepare(&writer, p, 1);
+            if (p == NULL) {
+                goto error;
+            }
+            *p++ = '\n';
+            continue;
         }
-        else {
-            new_start_loc = curr_line_ptr;
-            new_line_len = line_len;
+
+        // copy [new_line_start + candidate_len, iter) to buffer, then append
+        // '\n'
+        new_line_len = iter - line_start - candidate_len;
+        assert(new_line_len >= 0);
+        p = _PyBytesWriter_Prepare(&writer, p, new_line_len + 1);
+        if (p == NULL) {
+            goto error;
         }
+        memcpy(p, line_start + candidate_len, new_line_len);
 
-        // Copy the part of the line we want to keep to the new location
-        strncpy(curr_dst, new_start_loc, new_line_len);
-        curr_dst += new_line_len;
+        p += new_line_len;
 
-        curr_line_ptr = next_line_ptr;
+        // this may always append '\n' at the end of the input
+        *p++ = '\n';
+    }
+
+    new_bytes = _PyBytesWriter_Finish(&writer, p);
+    if (new_bytes == NULL) {
+        goto error;
     }
-    // null terminate the string (is this sufficient?)
-    (*curr_dst) = NULL;
     return new_bytes;
-#endif
 
+error:
+    _PyBytesWriter_Dealloc(&writer);
+    return NULL;
 }
 
-
 static int
 pymain_run_command(wchar_t *command)
 {
@@ -430,17 +392,9 @@ pymain_run_command(wchar_t *command)
         goto error;
     }
 
-    // Only perform auto-dedent if the string starts with a newline
-    if (*PyBytes_AsString(bytes) == '\n') {
-        PyObject *new_bytes = _utf_8_bytes_dedent(bytes);
-        if (new_bytes == NULL) {
-            goto error;
-        }
-        if (new_bytes != bytes) {
-            // dedent allocated new bytes, replace the old with the new
-            Py_DECREF(bytes);
-        }
-        bytes = new_bytes;
+    bytes = dedent_utf8_bytes(bytes);
+    if (bytes == NULL) {
+        goto error;
     }
 
     PyCompilerFlags cf = _PyCompilerFlags_INIT;

From 05d41692a664c18c11e31087e35ff6a484b11b7f Mon Sep 17 00:00:00 2001
From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com>
Date: Mon, 1 May 2023 22:15:15 +0800
Subject: [PATCH 15/42] fix missing initialization

---
 Modules/main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Modules/main.c b/Modules/main.c
index c3c4f1b7ce285c..0d7032f5e63164 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -323,6 +323,7 @@ static PyObject *dedent_utf8_bytes(PyObject *bytes) {
 
     for (char *iter = start; iter < end; ++iter) {
         line_start = iter;
+        in_leading_space = true;
 
         // iterate over a line
         while (iter < end && *iter != '\n') {

From 9d53c4ef2b390c3e5a3c872cbc98f171dd0b5280 Mon Sep 17 00:00:00 2001
From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com>
Date: Mon, 1 May 2023 22:20:57 +0800
Subject: [PATCH 16/42] fix ref leak

---
 Modules/main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Modules/main.c b/Modules/main.c
index 0d7032f5e63164..2b7fa7a97da775 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -357,7 +357,7 @@ static PyObject *dedent_utf8_bytes(PyObject *bytes) {
 
         p += new_line_len;
 
-        // this may always append '\n' at the end of the input
+        // this may always append '\n' at the end of `new_bytes`
         *p++ = '\n';
     }
 
@@ -365,6 +365,7 @@ static PyObject *dedent_utf8_bytes(PyObject *bytes) {
     if (new_bytes == NULL) {
         goto error;
     }
+    Py_DECREF(bytes);
     return new_bytes;
 
 error:

From 689a13a4624b7b655b61b9271a1985ff475d51c5 Mon Sep 17 00:00:00 2001
From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com>
Date: Mon, 1 May 2023 22:25:10 +0800
Subject: [PATCH 17/42] fix empty string

---
 Modules/main.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Modules/main.c b/Modules/main.c
index 2b7fa7a97da775..5758b2f702391d 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -9,6 +9,7 @@
 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
 
 /* Includes for exit_sigint() */
+#include <assert.h>
 #include <stdio.h>                // perror()
 #ifdef HAVE_SIGNAL_H
 #  include <signal.h>             // SIGINT
@@ -305,6 +306,9 @@ static PyObject *dedent_utf8_bytes(PyObject *bytes) {
         }
     }
 
+    if (candidate_len == 0) {
+        return bytes;
+    }
     assert(candidate_len > 0);
 
     // trigger a dedent

From f0ac7ea9fc5b365602fb07e8159216b2609358fd Mon Sep 17 00:00:00 2001
From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com>
Date: Mon, 1 May 2023 22:29:16 +0800
Subject: [PATCH 18/42] nit: remove unnecessary variable

---
 Modules/main.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/Modules/main.c b/Modules/main.c
index 5758b2f702391d..4267deefe6c60d 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -254,9 +254,8 @@ static PyObject *dedent_utf8_bytes(PyObject *bytes) {
         char *leading_whitespace_end = NULL;
 
         // scan the whole line
-        char c = 0;
-        while (iter < end && (c = *iter) != '\n') {
-            if (!leading_whitespace_end && c != ' ' && c != '\t') {
+        while (iter < end && *iter != '\n') {
+            if (!leading_whitespace_end && *iter != ' ' && *iter != '\t') {
                 if (iter == line_start) {
                     // some line has no indent, fast exit!
                     return bytes;
@@ -266,8 +265,6 @@ static PyObject *dedent_utf8_bytes(PyObject *bytes) {
             ++iter;
         }
 
-        // we reach the end of a line
-
         // if this line has all white space, skip it
         if (!leading_whitespace_end) {
             continue;

From 71cad010de23e64bf7a04d0b732a8a06bfd92870 Mon Sep 17 00:00:00 2001
From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com>
Date: Mon, 1 May 2023 22:29:54 +0800
Subject: [PATCH 19/42] remove unnecessary include

---
 Modules/main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Modules/main.c b/Modules/main.c
index 4267deefe6c60d..75954fca0a2fca 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -9,7 +9,6 @@
 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
 
 /* Includes for exit_sigint() */
-#include <assert.h>
 #include <stdio.h>                // perror()
 #ifdef HAVE_SIGNAL_H
 #  include <signal.h>             // SIGINT

From 4549de811225ca846e5fb8928750cb31e6fe5c5f Mon Sep 17 00:00:00 2001
From: joncrall <erotemic@gmail.com>
Date: Mon, 1 May 2023 12:44:48 -0400
Subject: [PATCH 20/42] Add test cases

---
 Lib/test/test_cmd_line.py | 69 +++++++++++++++++++++++++++++++++++++++
 Modules/main.c            |  2 +-
 2 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
index 94298003063593..f9310a598c674a 100644
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -901,6 +901,75 @@ def res2int(res):
         )
         self.assertEqual(res2int(res), (6000, 6000))
 
+    def test_cmd_dedent(self):
+        # test that -c auto-dedents its arguments
+        from textwrap import dedent
+        test_cases = [
+            {
+                'code': '''
+                    print('space-auto-dedent')
+                ''',
+                'expected': b'space-auto-dedent',
+            },
+            {
+                'code': dedent('''
+                ^^^print('tab-auto-dedent')
+                ''').replace('^', '\t'),
+                'expected': b'tab-auto-dedent',
+            },
+            {
+                'code': dedent('''
+                ^^if 1:
+                ^^^^print('mixed-auto-dedent-1')
+                ^^print('mixed-auto-dedent-2')
+                ''').replace('^', '\t \t'),
+                'expected': b'mixed-auto-dedent-1\nmixed-auto-dedent-2',
+            },
+            {
+                'code': '''
+                    data = """$
+
+                    this data has an empty newline above and a newline with spaces below $
+                                            $
+                    """$
+                    if 1:         $
+                        print(repr(data))$
+                '''.replace('$', ''),
+                # Note: entirely blank lines are normalized to \n, even if they
+                # are part of a data string. This is consistent with
+                # textwrap.dedent behavior, but might not be intuitive.
+                'expected': b"'\\n\\nthis data has an empty newline above and a newline with spaces below \\n\\n'",
+            },
+        ]
+        for case in test_cases:
+            # Run the auto-dedent case
+            args1 = sys.executable, '-c', case['code']
+            proc1 = subprocess.run(args1, stdout=subprocess.PIPE)
+            self.assertEqual(proc1.returncode, 0, proc1)
+            output1 = proc1.stdout.strip()
+
+            # Manually dedent beforehand, check the result is the same.
+            args2 = sys.executable, '-c', dedent(case['code'])
+            proc2 = subprocess.run(args2, stdout=subprocess.PIPE)
+            self.assertEqual(proc2.returncode, 0, proc2)
+            output2 = proc2.stdout.strip()
+
+            self.assertEqual(output1, output2)
+            self.assertEqual(output1, case['expected'])
+
+    def test_cmd_dedent_failcase(self):
+        # Mixing tabs and spaces is not allowed
+        from textwrap import dedent
+        template = dedent(
+            '''
+            -+if 1:
+            +-++ print('will fail')
+            ''')
+        code = template.replace('-', ' ').replace('+', '\t')
+        assert_python_failure('-c', code)
+        code = template.replace('-', '\t').replace('+', ' ')
+        assert_python_failure('-c', code)
+
 
 @unittest.skipIf(interpreter_requires_environment(),
                  'Cannot run -I tests when PYTHON env vars are required.')
diff --git a/Modules/main.c b/Modules/main.c
index 75954fca0a2fca..bfcdd3d5f8c07b 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -229,7 +229,7 @@ pymain_import_readline(const PyConfig *config)
 }
 
 /* Strip common leading whitespace, just as textwrap.dedent.
-   It stoles 1 reference from bytes if succeeded, else it will return NULL. */
+   It steals 1 reference from bytes if succeeded, else it will return NULL. */
 static PyObject *dedent_utf8_bytes(PyObject *bytes) {
     if (bytes == NULL || !PyBytes_CheckExact(bytes)) {
         return NULL;

From 0c3b90b12ed9b9e8e3901b6ead172b8343742bf9 Mon Sep 17 00:00:00 2001
From: joncrall <erotemic@gmail.com>
Date: Mon, 1 May 2023 13:46:03 -0400
Subject: [PATCH 21/42] Fix test on windows

---
 Lib/test/test_cmd_line.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
index f9310a598c674a..f518b7407dfdaf 100644
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -955,7 +955,7 @@ def test_cmd_dedent(self):
             output2 = proc2.stdout.strip()
 
             self.assertEqual(output1, output2)
-            self.assertEqual(output1, case['expected'])
+            self.assertEqual(output1.replace(b'\r', b''), case['expected'])
 
     def test_cmd_dedent_failcase(self):
         # Mixing tabs and spaces is not allowed

From 1f5b7463abe2a169b896db2e8086a9c498001ec8 Mon Sep 17 00:00:00 2001
From: joncrall <erotemic@gmail.com>
Date: Mon, 1 May 2023 19:44:37 -0400
Subject: [PATCH 22/42] normalize windows line endings

---
 Lib/test/test_cmd_line.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
index f518b7407dfdaf..89f7a299fb901c 100644
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -955,7 +955,7 @@ def test_cmd_dedent(self):
             output2 = proc2.stdout.strip()
 
             self.assertEqual(output1, output2)
-            self.assertEqual(output1.replace(b'\r', b''), case['expected'])
+            self.assertEqual(output1.replace(b'\r\n', b'\n'), case['expected'])
 
     def test_cmd_dedent_failcase(self):
         # Mixing tabs and spaces is not allowed

From 1f17e23c3d4af2c082beebd1b855e1bc262e4d71 Mon Sep 17 00:00:00 2001
From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com>
Date: Thu, 4 May 2023 02:32:42 +0800
Subject: [PATCH 23/42] Update Modules/main.c

Co-authored-by: Kirill Podoprigora <80244920+Eclips4@users.noreply.github.com>
---
 Modules/main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Modules/main.c b/Modules/main.c
index bfcdd3d5f8c07b..b016783f69beb1 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -230,7 +230,9 @@ pymain_import_readline(const PyConfig *config)
 
 /* Strip common leading whitespace, just as textwrap.dedent.
    It steals 1 reference from bytes if succeeded, else it will return NULL. */
-static PyObject *dedent_utf8_bytes(PyObject *bytes) {
+static PyObject *
+dedent_utf8_bytes(PyObject *bytes)
+{
     if (bytes == NULL || !PyBytes_CheckExact(bytes)) {
         return NULL;
     }

From c84616c1dacaebd8f75b2eb4c912860b43947d87 Mon Sep 17 00:00:00 2001
From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com>
Date: Sun, 23 Jul 2023 11:39:28 +0800
Subject: [PATCH 24/42] refactor code

---
 Include/internal/pycore_bytesobject.h         |  25 +++
 ...-04-29-23-15-38.gh-issue-103997.BS3uVt.rst |   8 +-
 Modules/main.c                                | 148 +++---------------
 Objects/bytesobject.c                         | 147 +++++++++++++++++
 4 files changed, 197 insertions(+), 131 deletions(-)

diff --git a/Include/internal/pycore_bytesobject.h b/Include/internal/pycore_bytesobject.h
index 115c0c52c8f9a9..f1167c1de52233 100644
--- a/Include/internal/pycore_bytesobject.h
+++ b/Include/internal/pycore_bytesobject.h
@@ -122,6 +122,31 @@ PyAPI_FUNC(void*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer,
     const void *bytes,
     Py_ssize_t size);
 
+
+/** Dedent a UTF-8 encoded string.
+ * behavior is expected to match `textwrap.dedent`
+ *
+ * return value:
+ * 0, no need to dedent, writer untouched
+ * 1, success
+ * -1, failure
+ *
+ * str is the beginning of the string to dedent.
+ * expecting (str != NULL)
+ *
+ * len is the length of the string to dedent.
+ * expecting (len >= 0)
+ * 
+ * writer is a _PyBytesWriter object to write the dedented string.
+ * expecting (writer != NULL)
+ *
+ * p points to a char* indicating the current position in the _PyBytesWriter.
+ * It is updated to the new position after writing the dedented string on exit.
+ * expecting (p != NULL && *p != NULL)
+ */
+PyAPI_FUNC(int)
+_PyBytes_Dedent(const char *str, Py_ssize_t len, _PyBytesWriter *writer,
+                char **p);
 #ifdef __cplusplus
 }
 #endif
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst b/Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst
index 8949f435731e34..c02978d024bcc8 100644
--- a/Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst	
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst	
@@ -1 +1,7 @@
-Strings passed to  "-c" are now automatically dedented (common leading whitespace is removed). This allows "python -c" invocations to be indented in shell scripts without causing indentation errors.
+String arguments passed to  "-c" are now automatically dedented as if by
+:func:`textwrap.dedent`. This allows "python -c" invocations to be indented
+ in shell scripts without causing indentation errors.
+
+Add a private API :c:func:`_PyBytes_Dedent`.
+
+(Patch by Jon Crall and Steven Sun)
diff --git a/Modules/main.c b/Modules/main.c
index 0cd99628cdf64d..cd9a5b84f9a0b6 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -7,6 +7,7 @@
 #include "pycore_pathconfig.h"    // _PyPathConfig_ComputeSysPath0()
 #include "pycore_pylifecycle.h"   // _Py_PreInitializeFromPyArgv()
 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
+#include "pycore_bytesobject.h"   // _PyBytesWriter, _PyBytes_Dedent()
 
 /* Includes for exit_sigint() */
 #include <stdio.h>                // perror()
@@ -229,150 +230,35 @@ pymain_import_readline(const PyConfig *config)
 }
 
 /* Strip common leading whitespace, just as textwrap.dedent.
-   It steals 1 reference from bytes if succeeded, else it will return NULL. */
+   It returns a new reference. */
 static PyObject *
 dedent_utf8_bytes(PyObject *bytes)
 {
-    if (bytes == NULL || !PyBytes_CheckExact(bytes)) {
-        return NULL;
-    }
+    assert(bytes == NULL || !PyBytes_CheckExact(bytes->ob_type));
 
-    char *start;
     Py_ssize_t nchars;
-
+    char *start;
     if (PyBytes_AsStringAndSize(bytes, &start, &nchars) != 0) {
         return NULL;
     }
 
-    char *end = start + nchars;
-    assert(start < end);
-
-    char *candidate_start = NULL;
-    Py_ssize_t candidate_len = 0;
-
-    for (char *iter = start; iter < end; ++iter) {
-        char *line_start = iter;
-        char *leading_whitespace_end = NULL;
-
-        // scan the whole line
-        while (iter < end && *iter != '\n') {
-            if (!leading_whitespace_end && *iter != ' ' && *iter != '\t') {
-                if (iter == line_start) {
-                    // some line has no indent, fast exit!
-                    return bytes;
-                }
-                leading_whitespace_end = iter;
-            }
-            ++iter;
-        }
-
-        // if this line has all white space, skip it
-        if (!leading_whitespace_end) {
-            continue;
-        }
-
-        if (!candidate_start) {
-            candidate_start = line_start;
-            candidate_len = leading_whitespace_end - line_start;
-            assert(candidate_len > 0);
-        } else {
-            /* We then compare with the current longest leading whitespace.
-
-               [line_start, leading_whitespace_end) is the leading whitespace of
-               this line,
-
-               [candidate_start, candidate_start + candidate_len)
-               is the leading whitespace of the current longest leading
-               whitespace. */
-            Py_ssize_t new_candidate_len = 0;
-
-            for (char *candidate_iter = candidate_start,
-                      *line_iter = line_start;
-                 candidate_iter < candidate_start + candidate_len &&
-                 line_iter < leading_whitespace_end;
-                 ++candidate_iter, ++line_iter) {
-                if (*candidate_iter != *line_iter) {
-                    break;
-                }
-                ++new_candidate_len;
-            }
-
-            candidate_len = new_candidate_len;
-            if (candidate_len == 0) {
-                return bytes;
-            }
-        }
-    }
-
-    if (candidate_len == 0) {
-        return bytes;
-    }
-    assert(candidate_len > 0);
-
-    // trigger a dedent
-    char *p;
-    PyObject *new_bytes;
-    char *line_start;
-    Py_ssize_t new_line_len;
-    bool in_leading_space;
     _PyBytesWriter writer;
-
     _PyBytesWriter_Init(&writer);
-    p = _PyBytesWriter_Alloc(&writer, nchars);
+    char *p = _PyBytesWriter_Alloc(&writer, nchars);
     if (p == NULL) {
-        goto error;
+        return NULL;
     }
 
-    for (char *iter = start; iter < end; ++iter) {
-        line_start = iter;
-        in_leading_space = true;
-
-        // iterate over a line
-        while (iter < end && *iter != '\n') {
-            if (in_leading_space && *iter != ' ' && *iter != '\t') {
-                in_leading_space = false;
-            }
-            ++iter;
-        }
-
-        // invariant: *iter == '\n' or iter == end
-
-        // if this line has all white space, write '\n'
-        if (in_leading_space) {
-            p = _PyBytesWriter_Prepare(&writer, p, 1);
-            if (p == NULL) {
-                goto error;
-            }
-            *p++ = '\n';
-            continue;
-        }
-
-        // copy [new_line_start + candidate_len, iter) to buffer, then append
-        // '\n'
-        new_line_len = iter - line_start - candidate_len;
-        assert(new_line_len >= 0);
-        p = _PyBytesWriter_Prepare(&writer, p, new_line_len + 1);
-        if (p == NULL) {
-            goto error;
-        }
-        memcpy(p, line_start + candidate_len, new_line_len);
-
-        p += new_line_len;
-
-        // this may always append '\n' at the end of `new_bytes`
-        *p++ = '\n';
+    int ret = _PyBytes_Dedent(start, nchars, &writer, &p);
+    if (ret < 0) {
+        return NULL;
     }
-
-    new_bytes = _PyBytesWriter_Finish(&writer, p);
-    if (new_bytes == NULL) {
-        goto error;
+    if (ret == 0) {
+        Py_INCREF(bytes);
+        _PyBytesWriter_Dealloc(&writer);
+        return bytes;
     }
-    Py_DECREF(bytes);
-    return new_bytes;
-
-error:
-    _PyBytesWriter_Dealloc(&writer);
-    return NULL;
+    return _PyBytesWriter_Finish(&writer, p);
 }
 
 static int
@@ -396,10 +282,12 @@ pymain_run_command(wchar_t *command)
         goto error;
     }
 
-    bytes = dedent_utf8_bytes(bytes);
-    if (bytes == NULL) {
+    PyObject *new_bytes = dedent_utf8_bytes(bytes);
+    if (new_bytes == NULL) {
+        Py_DECREF(bytes);
         goto error;
     }
+    Py_SETREF(bytes, new_bytes);
 
     PyCompilerFlags cf = _PyCompilerFlags_INIT;
     cf.cf_flags |= PyCF_IGNORE_COOKIE;
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 6b9231a9fa7693..8c82c6e1ab1dbe 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -3535,6 +3535,8 @@ _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
 }
 
 
+/* Algorithms on bytes */
+
 void
 _PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
     const char* src, Py_ssize_t len_src)
@@ -3558,3 +3560,148 @@ _PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
     }
 }
 
+/** Dedent a UTF-8 encoded string.
+ * behavior is expected to match `textwrap.dedent`
+ *
+ * return value:
+ * 0, no need to dedent, writer untouched
+ * 1, success
+ * -1, failure
+ *
+ * str is the beginning of the string to dedent.
+ * expecting (str != NULL)
+ *
+ * len is the length of the string to dedent.
+ * expecting (len >= 0)
+ * 
+ * writer is a _PyBytesWriter object to write the dedented string.
+ * expecting (writer != NULL)
+ *
+ * p points to a char* indicating the current position in the _PyBytesWriter.
+ * It is updated to the new position after writing the dedented string on exit.
+ * expecting (p != NULL && *p != NULL)
+ */
+int
+_PyBytes_Dedent(const char *str, Py_ssize_t len, _PyBytesWriter *writer,
+                char **p)
+{   
+    assert(str);
+    assert(p != NULL && *p != NULL);
+    assert(writer);
+
+    if (len <= 0)
+        return 0;
+
+    const char *end = str + len;
+    assert(str < end); // prevent overflow when len is too large
+
+    const char *candidate_start = NULL;
+    Py_ssize_t candidate_len = 0;
+
+    for (const char *iter = str; iter < end; ++iter) {
+        const char *line_start = iter;
+        const char *leading_whitespace_end = NULL;
+
+        // scan the whole line
+        while (iter < end && *iter != '\n') {
+            if (!leading_whitespace_end && *iter != ' ' && *iter != '\t') {
+                if (iter == line_start) {
+                    // some line has no indent, fast exit!
+                    return 0;
+                }
+                leading_whitespace_end = iter;
+            }
+            ++iter;
+        }
+
+        // if this line has all white space, skip it
+        if (!leading_whitespace_end) {
+            continue;
+        }
+
+        if (!candidate_start) {
+            candidate_start = line_start;
+            candidate_len = leading_whitespace_end - line_start;
+            assert(candidate_len > 0);
+        } else {
+            /* We then compare with the current longest leading whitespace.
+
+               [line_start, leading_whitespace_end) is the leading whitespace of
+               this line,
+
+               [candidate_start, candidate_start + candidate_len)
+               is the leading whitespace of the current longest leading
+               whitespace. */
+            Py_ssize_t new_candidate_len = 0;
+
+            for (const char *candidate_iter = candidate_start,
+                            *line_iter = line_start;
+                 candidate_iter < candidate_start + candidate_len &&
+                 line_iter < leading_whitespace_end;
+                 ++candidate_iter, ++line_iter) {
+                if (*candidate_iter != *line_iter) {
+                    break;
+                }
+                ++new_candidate_len;
+            }
+
+            candidate_len = new_candidate_len;
+            if (candidate_len == 0) {
+                return 0;
+            }
+        }
+    }
+
+    assert(candidate_len >= 0);
+    if (candidate_len == 0) {
+        return 0;
+    }
+
+    // trigger a dedent
+
+    // prepare the writer
+    char *p_ = _PyBytesWriter_Prepare(writer, *p, len);
+    if (p_ == NULL) {
+        *p = NULL;
+        return -1;
+    }
+
+    for (const char *iter = str; iter < end; ++iter) {
+        const char *line_start = iter;
+        bool in_leading_space = true;
+
+        // iterate over a line to find the end of a line
+        while (iter < end && *iter != '\n') {
+            if (in_leading_space && *iter != ' ' && *iter != '\t') {
+                in_leading_space = false;
+            }
+            ++iter;
+        }
+
+        // invariant: *iter == '\n' or iter == end
+        bool append_newline = iter < end;
+
+        // if this line has all white space, write '\n'
+        if (in_leading_space && append_newline) {
+            *p_++ = '\n';
+            continue;
+        }
+
+        /* copy [new_line_start + candidate_len, iter) to buffer, then
+            conditionally append '\n' */
+
+        Py_ssize_t new_line_len = iter - line_start - candidate_len;
+        assert(new_line_len >= 0);
+
+        memcpy(p_, line_start + candidate_len, new_line_len);
+
+        p_ += new_line_len;
+
+        if (append_newline) {
+            *p_++ = '\n';
+        }
+    }
+
+    *p = p_;
+    return 1;
+}

From a19b67564eb07767e2fc53c99cb21b09c2173e38 Mon Sep 17 00:00:00 2001
From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com>
Date: Sun, 23 Jul 2023 11:58:51 +0800
Subject: [PATCH 25/42] Apply suggestions from code review

---
 Include/internal/pycore_bytesobject.h | 2 +-
 Objects/bytesobject.c                 | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Include/internal/pycore_bytesobject.h b/Include/internal/pycore_bytesobject.h
index f1167c1de52233..12beb1617705c3 100644
--- a/Include/internal/pycore_bytesobject.h
+++ b/Include/internal/pycore_bytesobject.h
@@ -136,7 +136,7 @@ PyAPI_FUNC(void*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer,
  *
  * len is the length of the string to dedent.
  * expecting (len >= 0)
- * 
+ *
  * writer is a _PyBytesWriter object to write the dedented string.
  * expecting (writer != NULL)
  *
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 8c82c6e1ab1dbe..7c27eda7e39056 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -3573,7 +3573,7 @@ _PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
  *
  * len is the length of the string to dedent.
  * expecting (len >= 0)
- * 
+ *
  * writer is a _PyBytesWriter object to write the dedented string.
  * expecting (writer != NULL)
  *
@@ -3584,7 +3584,7 @@ _PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
 int
 _PyBytes_Dedent(const char *str, Py_ssize_t len, _PyBytesWriter *writer,
                 char **p)
-{   
+{
     assert(str);
     assert(p != NULL && *p != NULL);
     assert(writer);

From 7ce411f5fc4ddb76fc81a2d6b0e354de5546874b Mon Sep 17 00:00:00 2001
From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com>
Date: Sun, 23 Jul 2023 17:26:50 +0800
Subject: [PATCH 26/42] Update Misc/NEWS.d/next/Core and
 Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst

Co-authored-by: Inada Naoki <songofacandy@gmail.com>
---
 .../2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst              | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst b/Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst
index c02978d024bcc8..adeb39570bcd9d 100644
--- a/Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst	
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst	
@@ -1,6 +1,6 @@
 String arguments passed to  "-c" are now automatically dedented as if by
 :func:`textwrap.dedent`. This allows "python -c" invocations to be indented
- in shell scripts without causing indentation errors.
+in shell scripts without causing indentation errors.
 
 Add a private API :c:func:`_PyBytes_Dedent`.
 

From dea43017537845f82da1d14538fdcaab1b65837d Mon Sep 17 00:00:00 2001
From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com>
Date: Sun, 23 Jul 2023 18:20:47 +0800
Subject: [PATCH 27/42] resolve comments

---
 Include/internal/pycore_bytesobject.h | 49 +++++++++++-----------
 Modules/main.c                        | 20 ++++-----
 Objects/bytesobject.c                 | 59 +++++++++++----------------
 3 files changed, 58 insertions(+), 70 deletions(-)

diff --git a/Include/internal/pycore_bytesobject.h b/Include/internal/pycore_bytesobject.h
index 12beb1617705c3..c8d034b0e64d91 100644
--- a/Include/internal/pycore_bytesobject.h
+++ b/Include/internal/pycore_bytesobject.h
@@ -41,6 +41,30 @@ PyAPI_FUNC(void)
 _PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
     const char* src, Py_ssize_t len_src);
 
+
+/** Dedent a UTF-8 encoded string.
+ * behavior is expected to match `textwrap.dedent`
+ *
+ * return value:
+ * 0, no need to dedent, `out_len` untouched
+ * 1, success
+ *
+ * `src` is the string to dedent.
+ * expecting `(src != NULL)`
+ *
+ * `src_len` is the length of `src`.
+ *
+ * `out` is a buffer for the result.
+ * expecting `(out != NULL)`
+ *
+ * `out_len` points to the length of `out`, and is updated to the length of the
+ * result upon success. Output buffer should be large enough to hold the result.
+ * expecting `(out_len != NULL && *out_len >= src_len)`
+ */
+PyAPI_FUNC(int)
+_PyBytes_Dedent(const char *src, Py_ssize_t src_len, char* out,
+                Py_ssize_t* out_len);
+
 /* --- _PyBytesWriter ----------------------------------------------------- */
 
 /* The _PyBytesWriter structure is big: it contains an embedded "stack buffer".
@@ -122,31 +146,6 @@ PyAPI_FUNC(void*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer,
     const void *bytes,
     Py_ssize_t size);
 
-
-/** Dedent a UTF-8 encoded string.
- * behavior is expected to match `textwrap.dedent`
- *
- * return value:
- * 0, no need to dedent, writer untouched
- * 1, success
- * -1, failure
- *
- * str is the beginning of the string to dedent.
- * expecting (str != NULL)
- *
- * len is the length of the string to dedent.
- * expecting (len >= 0)
- *
- * writer is a _PyBytesWriter object to write the dedented string.
- * expecting (writer != NULL)
- *
- * p points to a char* indicating the current position in the _PyBytesWriter.
- * It is updated to the new position after writing the dedented string on exit.
- * expecting (p != NULL && *p != NULL)
- */
-PyAPI_FUNC(int)
-_PyBytes_Dedent(const char *str, Py_ssize_t len, _PyBytesWriter *writer,
-                char **p);
 #ifdef __cplusplus
 }
 #endif
diff --git a/Modules/main.c b/Modules/main.c
index cd9a5b84f9a0b6..6853858b01ef74 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -234,7 +234,7 @@ pymain_import_readline(const PyConfig *config)
 static PyObject *
 dedent_utf8_bytes(PyObject *bytes)
 {
-    assert(bytes == NULL || !PyBytes_CheckExact(bytes->ob_type));
+    assert(bytes != NULL && PyBytes_CheckExact(bytes));
 
     Py_ssize_t nchars;
     char *start;
@@ -242,23 +242,23 @@ dedent_utf8_bytes(PyObject *bytes)
         return NULL;
     }
 
-    _PyBytesWriter writer;
-    _PyBytesWriter_Init(&writer);
-    char *p = _PyBytesWriter_Alloc(&writer, nchars);
+    char* p = PyMem_Malloc(nchars);
     if (p == NULL) {
+        PyErr_NoMemory();
         return NULL;
     }
 
-    int ret = _PyBytes_Dedent(start, nchars, &writer, &p);
-    if (ret < 0) {
-        return NULL;
-    }
+    int ret = _PyBytes_Dedent(start, nchars, p, &nchars);
+
     if (ret == 0) {
         Py_INCREF(bytes);
-        _PyBytesWriter_Dealloc(&writer);
+        PyMem_Free(p);
         return bytes;
     }
-    return _PyBytesWriter_Finish(&writer, p);
+
+    PyObject* new_bytes = PyBytes_FromStringAndSize(p, nchars);
+    PyMem_Free(p);
+    return new_bytes;
 }
 
 static int
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 7c27eda7e39056..1afae469d75211 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -3564,41 +3564,37 @@ _PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
  * behavior is expected to match `textwrap.dedent`
  *
  * return value:
- * 0, no need to dedent, writer untouched
+ * 0, no need to dedent, `out_len` untouched
  * 1, success
- * -1, failure
  *
- * str is the beginning of the string to dedent.
- * expecting (str != NULL)
+ * `src` is the string to dedent.
+ * expecting `(src != NULL)`
  *
- * len is the length of the string to dedent.
- * expecting (len >= 0)
+ * `src_len` is the length of `src`.
  *
- * writer is a _PyBytesWriter object to write the dedented string.
- * expecting (writer != NULL)
+ * `out` is a buffer for the result.
+ * expecting `(out != NULL)`
  *
- * p points to a char* indicating the current position in the _PyBytesWriter.
- * It is updated to the new position after writing the dedented string on exit.
- * expecting (p != NULL && *p != NULL)
+ * `out_len` points to the length of `out`, and is updated to the length of the
+ * result upon success. Output buffer should be large enough to hold the result.
+ * expecting `(out_len != NULL && *out_len >= src_len)`
  */
 int
-_PyBytes_Dedent(const char *str, Py_ssize_t len, _PyBytesWriter *writer,
-                char **p)
-{
-    assert(str);
-    assert(p != NULL && *p != NULL);
-    assert(writer);
+_PyBytes_Dedent(const char *src, Py_ssize_t src_len, char *out,
+                Py_ssize_t *out_len) {
+    assert(src && out && out_len);
+    assert(*out_len >= src_len);
 
-    if (len <= 0)
+    if (src_len <= 0)
         return 0;
 
-    const char *end = str + len;
-    assert(str < end); // prevent overflow when len is too large
+    const char *end = src + src_len;
+    assert(src < end); // prevent overflow when src_len is too large
 
     const char *candidate_start = NULL;
     Py_ssize_t candidate_len = 0;
 
-    for (const char *iter = str; iter < end; ++iter) {
+    for (const char *iter = src; iter < end; ++iter) {
         const char *line_start = iter;
         const char *leading_whitespace_end = NULL;
 
@@ -3658,15 +3654,9 @@ _PyBytes_Dedent(const char *str, Py_ssize_t len, _PyBytesWriter *writer,
     }
 
     // trigger a dedent
+    char *out_start = out;
 
-    // prepare the writer
-    char *p_ = _PyBytesWriter_Prepare(writer, *p, len);
-    if (p_ == NULL) {
-        *p = NULL;
-        return -1;
-    }
-
-    for (const char *iter = str; iter < end; ++iter) {
+    for (const char *iter = src; iter < end; ++iter) {
         const char *line_start = iter;
         bool in_leading_space = true;
 
@@ -3683,7 +3673,7 @@ _PyBytes_Dedent(const char *str, Py_ssize_t len, _PyBytesWriter *writer,
 
         // if this line has all white space, write '\n'
         if (in_leading_space && append_newline) {
-            *p_++ = '\n';
+            *out++ = '\n';
             continue;
         }
 
@@ -3693,15 +3683,14 @@ _PyBytes_Dedent(const char *str, Py_ssize_t len, _PyBytesWriter *writer,
         Py_ssize_t new_line_len = iter - line_start - candidate_len;
         assert(new_line_len >= 0);
 
-        memcpy(p_, line_start + candidate_len, new_line_len);
+        memcpy(out, line_start + candidate_len, new_line_len);
 
-        p_ += new_line_len;
+        out += new_line_len;
 
         if (append_newline) {
-            *p_++ = '\n';
+            *out++ = '\n';
         }
     }
-
-    *p = p_;
+    *out_len = out - out_start;
     return 1;
 }

From e06d40cd9f419291f31d14a563f4634b5af8bd85 Mon Sep 17 00:00:00 2001
From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com>
Date: Sun, 23 Jul 2023 19:46:55 +0800
Subject: [PATCH 28/42] Update Modules/main.c

---
 Modules/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Modules/main.c b/Modules/main.c
index 6853858b01ef74..913bef7921eb4d 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -7,7 +7,7 @@
 #include "pycore_pathconfig.h"    // _PyPathConfig_ComputeSysPath0()
 #include "pycore_pylifecycle.h"   // _Py_PreInitializeFromPyArgv()
 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
-#include "pycore_bytesobject.h"   // _PyBytesWriter, _PyBytes_Dedent()
+#include "pycore_bytesobject.h"   // _PyBytes_Dedent()
 
 /* Includes for exit_sigint() */
 #include <stdio.h>                // perror()

From a40d028324c4159a8526f3e69e2cd1765952b1bc Mon Sep 17 00:00:00 2001
From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com>
Date: Sun, 23 Jul 2023 23:12:26 +0800
Subject: [PATCH 29/42] rename `out` to `dest`

---
 Include/internal/pycore_bytesobject.h | 17 +++++++-------
 Objects/bytesobject.c                 | 33 ++++++++++++++-------------
 2 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/Include/internal/pycore_bytesobject.h b/Include/internal/pycore_bytesobject.h
index c8d034b0e64d91..411dbcc6edc9ea 100644
--- a/Include/internal/pycore_bytesobject.h
+++ b/Include/internal/pycore_bytesobject.h
@@ -46,7 +46,7 @@ _PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
  * behavior is expected to match `textwrap.dedent`
  *
  * return value:
- * 0, no need to dedent, `out_len` untouched
+ * 0, no need to dedent, `dest` buffer and `*dest_len` untouched
  * 1, success
  *
  * `src` is the string to dedent.
@@ -54,16 +54,17 @@ _PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
  *
  * `src_len` is the length of `src`.
  *
- * `out` is a buffer for the result.
- * expecting `(out != NULL)`
+ * `dest` is a buffer for the result.
+ * expecting `(dest != NULL)`
  *
- * `out_len` points to the length of `out`, and is updated to the length of the
- * result upon success. Output buffer should be large enough to hold the result.
- * expecting `(out_len != NULL && *out_len >= src_len)`
+ * `*dest_len` stores the length of `dest` on entry, and is updated to the
+ * length of the dedent result upon success. Output buffer should be large
+ * enough to hold the result.
+ * expecting `(dest_len != NULL && *dest_len >= src_len)`
  */
 PyAPI_FUNC(int)
-_PyBytes_Dedent(const char *src, Py_ssize_t src_len, char* out,
-                Py_ssize_t* out_len);
+_PyBytes_Dedent(const char *src, Py_ssize_t src_len, char* dest,
+                Py_ssize_t* dest_len);
 
 /* --- _PyBytesWriter ----------------------------------------------------- */
 
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 1afae469d75211..6de0218459939c 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -3564,7 +3564,7 @@ _PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
  * behavior is expected to match `textwrap.dedent`
  *
  * return value:
- * 0, no need to dedent, `out_len` untouched
+ * 0, no need to dedent, `dest` buffer and `*dest_len` untouched
  * 1, success
  *
  * `src` is the string to dedent.
@@ -3572,18 +3572,19 @@ _PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
  *
  * `src_len` is the length of `src`.
  *
- * `out` is a buffer for the result.
- * expecting `(out != NULL)`
+ * `dest` is a buffer for the result.
+ * expecting `(dest != NULL)`
  *
- * `out_len` points to the length of `out`, and is updated to the length of the
- * result upon success. Output buffer should be large enough to hold the result.
- * expecting `(out_len != NULL && *out_len >= src_len)`
+ * `*dest_len` stores the length of `dest` on entry, and is updated to the
+ * length of the dedent result upon success. Output buffer should be large
+ * enough to hold the result.
+ * expecting `(dest_len != NULL && *dest_len >= src_len)`
  */
 int
-_PyBytes_Dedent(const char *src, Py_ssize_t src_len, char *out,
-                Py_ssize_t *out_len) {
-    assert(src && out && out_len);
-    assert(*out_len >= src_len);
+_PyBytes_Dedent(const char *src, Py_ssize_t src_len, char *dest,
+                Py_ssize_t *dest_len) {
+    assert(src && dest && dest_len);
+    assert(*dest_len >= src_len);
 
     if (src_len <= 0)
         return 0;
@@ -3654,7 +3655,7 @@ _PyBytes_Dedent(const char *src, Py_ssize_t src_len, char *out,
     }
 
     // trigger a dedent
-    char *out_start = out;
+    char *dest_start = dest;
 
     for (const char *iter = src; iter < end; ++iter) {
         const char *line_start = iter;
@@ -3673,7 +3674,7 @@ _PyBytes_Dedent(const char *src, Py_ssize_t src_len, char *out,
 
         // if this line has all white space, write '\n'
         if (in_leading_space && append_newline) {
-            *out++ = '\n';
+            *dest++ = '\n';
             continue;
         }
 
@@ -3683,14 +3684,14 @@ _PyBytes_Dedent(const char *src, Py_ssize_t src_len, char *out,
         Py_ssize_t new_line_len = iter - line_start - candidate_len;
         assert(new_line_len >= 0);
 
-        memcpy(out, line_start + candidate_len, new_line_len);
+        memcpy(dest, line_start + candidate_len, new_line_len);
 
-        out += new_line_len;
+        dest += new_line_len;
 
         if (append_newline) {
-            *out++ = '\n';
+            *dest++ = '\n';
         }
     }
-    *out_len = out - out_start;
+    *dest_len = dest - dest_start;
     return 1;
 }

From 9569655476c2e6d91980e39489c7f0342c95dbab Mon Sep 17 00:00:00 2001
From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com>
Date: Mon, 24 Jul 2023 23:36:22 +0800
Subject: [PATCH 30/42] move to _PyUnicode_Dedent

---
 Include/internal/pycore_bytesobject.h         |  25 ----
 Include/internal/pycore_unicodeobject.h       |   6 +
 ...-04-29-23-15-38.gh-issue-103997.BS3uVt.rst |   9 +-
 Modules/main.c                                |  43 +-----
 Objects/bytesobject.c                         | 136 -----------------
 Objects/unicodeobject.c                       | 141 ++++++++++++++++++
 6 files changed, 156 insertions(+), 204 deletions(-)

diff --git a/Include/internal/pycore_bytesobject.h b/Include/internal/pycore_bytesobject.h
index 411dbcc6edc9ea..115c0c52c8f9a9 100644
--- a/Include/internal/pycore_bytesobject.h
+++ b/Include/internal/pycore_bytesobject.h
@@ -41,31 +41,6 @@ PyAPI_FUNC(void)
 _PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
     const char* src, Py_ssize_t len_src);
 
-
-/** Dedent a UTF-8 encoded string.
- * behavior is expected to match `textwrap.dedent`
- *
- * return value:
- * 0, no need to dedent, `dest` buffer and `*dest_len` untouched
- * 1, success
- *
- * `src` is the string to dedent.
- * expecting `(src != NULL)`
- *
- * `src_len` is the length of `src`.
- *
- * `dest` is a buffer for the result.
- * expecting `(dest != NULL)`
- *
- * `*dest_len` stores the length of `dest` on entry, and is updated to the
- * length of the dedent result upon success. Output buffer should be large
- * enough to hold the result.
- * expecting `(dest_len != NULL && *dest_len >= src_len)`
- */
-PyAPI_FUNC(int)
-_PyBytes_Dedent(const char *src, Py_ssize_t src_len, char* dest,
-                Py_ssize_t* dest_len);
-
 /* --- _PyBytesWriter ----------------------------------------------------- */
 
 /* The _PyBytesWriter structure is big: it contains an embedded "stack buffer".
diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h
index ad59c3e385f2d3..fd10c4c0cba53d 100644
--- a/Include/internal/pycore_unicodeobject.h
+++ b/Include/internal/pycore_unicodeobject.h
@@ -339,6 +339,12 @@ extern Py_ssize_t _PyUnicode_InsertThousandsGrouping(
     PyObject *thousands_sep,
     Py_UCS4 *maxchar);
 
+/* Dedent a string.
+   Behaviour is expected to be an exact match of `textwrap.dedent`.
+   Return a new reference on success, NULL with exception set on error.
+   */
+PyAPI_FUNC(PyObject*) _PyUnicode_Dedent(PyObject *unicode);
+
 /* --- Misc functions ----------------------------------------------------- */
 
 extern PyObject* _PyUnicode_FormatLong(PyObject *, int, int, int);
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst b/Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst
index adeb39570bcd9d..2e033ba98e12b3 100644
--- a/Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst	
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst	
@@ -1,7 +1,6 @@
-String arguments passed to  "-c" are now automatically dedented as if by
+String arguments passed to "-c" are now automatically dedented as if by
 :func:`textwrap.dedent`. This allows "python -c" invocations to be indented
-in shell scripts without causing indentation errors.
+in shell scripts without causing indentation errors. (Patch by Jon Crall and
+Steven Sun)
 
-Add a private API :c:func:`_PyBytes_Dedent`.
-
-(Patch by Jon Crall and Steven Sun)
+Add an internal API :c:func:`_PyUnicode_Dedent`. (Patch by Steven Sun)
diff --git a/Modules/main.c b/Modules/main.c
index 913bef7921eb4d..27cdcb042584fa 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -229,37 +229,6 @@ pymain_import_readline(const PyConfig *config)
     }
 }
 
-/* Strip common leading whitespace, just as textwrap.dedent.
-   It returns a new reference. */
-static PyObject *
-dedent_utf8_bytes(PyObject *bytes)
-{
-    assert(bytes != NULL && PyBytes_CheckExact(bytes));
-
-    Py_ssize_t nchars;
-    char *start;
-    if (PyBytes_AsStringAndSize(bytes, &start, &nchars) != 0) {
-        return NULL;
-    }
-
-    char* p = PyMem_Malloc(nchars);
-    if (p == NULL) {
-        PyErr_NoMemory();
-        return NULL;
-    }
-
-    int ret = _PyBytes_Dedent(start, nchars, p, &nchars);
-
-    if (ret == 0) {
-        Py_INCREF(bytes);
-        PyMem_Free(p);
-        return bytes;
-    }
-
-    PyObject* new_bytes = PyBytes_FromStringAndSize(p, nchars);
-    PyMem_Free(p);
-    return new_bytes;
-}
 
 static int
 pymain_run_command(wchar_t *command)
@@ -276,18 +245,16 @@ pymain_run_command(wchar_t *command)
         return pymain_exit_err_print();
     }
 
-    bytes = PyUnicode_AsUTF8String(unicode);
-    Py_DECREF(unicode);
-    if (bytes == NULL) {
+    Py_SETREF(unicode, _PyUnicode_Dedent(unicode));
+    if (unicode == NULL) {
         goto error;
     }
 
-    PyObject *new_bytes = dedent_utf8_bytes(bytes);
-    if (new_bytes == NULL) {
-        Py_DECREF(bytes);
+    bytes = PyUnicode_AsUTF8String(unicode);
+    Py_DECREF(unicode);
+    if (bytes == NULL) {
         goto error;
     }
-    Py_SETREF(bytes, new_bytes);
 
     PyCompilerFlags cf = _PyCompilerFlags_INIT;
     cf.cf_flags |= PyCF_IGNORE_COOKIE;
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 6de0218459939c..fdfef6cbcdeb06 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -3559,139 +3559,3 @@ _PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
         }
     }
 }
-
-/** Dedent a UTF-8 encoded string.
- * behavior is expected to match `textwrap.dedent`
- *
- * return value:
- * 0, no need to dedent, `dest` buffer and `*dest_len` untouched
- * 1, success
- *
- * `src` is the string to dedent.
- * expecting `(src != NULL)`
- *
- * `src_len` is the length of `src`.
- *
- * `dest` is a buffer for the result.
- * expecting `(dest != NULL)`
- *
- * `*dest_len` stores the length of `dest` on entry, and is updated to the
- * length of the dedent result upon success. Output buffer should be large
- * enough to hold the result.
- * expecting `(dest_len != NULL && *dest_len >= src_len)`
- */
-int
-_PyBytes_Dedent(const char *src, Py_ssize_t src_len, char *dest,
-                Py_ssize_t *dest_len) {
-    assert(src && dest && dest_len);
-    assert(*dest_len >= src_len);
-
-    if (src_len <= 0)
-        return 0;
-
-    const char *end = src + src_len;
-    assert(src < end); // prevent overflow when src_len is too large
-
-    const char *candidate_start = NULL;
-    Py_ssize_t candidate_len = 0;
-
-    for (const char *iter = src; iter < end; ++iter) {
-        const char *line_start = iter;
-        const char *leading_whitespace_end = NULL;
-
-        // scan the whole line
-        while (iter < end && *iter != '\n') {
-            if (!leading_whitespace_end && *iter != ' ' && *iter != '\t') {
-                if (iter == line_start) {
-                    // some line has no indent, fast exit!
-                    return 0;
-                }
-                leading_whitespace_end = iter;
-            }
-            ++iter;
-        }
-
-        // if this line has all white space, skip it
-        if (!leading_whitespace_end) {
-            continue;
-        }
-
-        if (!candidate_start) {
-            candidate_start = line_start;
-            candidate_len = leading_whitespace_end - line_start;
-            assert(candidate_len > 0);
-        } else {
-            /* We then compare with the current longest leading whitespace.
-
-               [line_start, leading_whitespace_end) is the leading whitespace of
-               this line,
-
-               [candidate_start, candidate_start + candidate_len)
-               is the leading whitespace of the current longest leading
-               whitespace. */
-            Py_ssize_t new_candidate_len = 0;
-
-            for (const char *candidate_iter = candidate_start,
-                            *line_iter = line_start;
-                 candidate_iter < candidate_start + candidate_len &&
-                 line_iter < leading_whitespace_end;
-                 ++candidate_iter, ++line_iter) {
-                if (*candidate_iter != *line_iter) {
-                    break;
-                }
-                ++new_candidate_len;
-            }
-
-            candidate_len = new_candidate_len;
-            if (candidate_len == 0) {
-                return 0;
-            }
-        }
-    }
-
-    assert(candidate_len >= 0);
-    if (candidate_len == 0) {
-        return 0;
-    }
-
-    // trigger a dedent
-    char *dest_start = dest;
-
-    for (const char *iter = src; iter < end; ++iter) {
-        const char *line_start = iter;
-        bool in_leading_space = true;
-
-        // iterate over a line to find the end of a line
-        while (iter < end && *iter != '\n') {
-            if (in_leading_space && *iter != ' ' && *iter != '\t') {
-                in_leading_space = false;
-            }
-            ++iter;
-        }
-
-        // invariant: *iter == '\n' or iter == end
-        bool append_newline = iter < end;
-
-        // if this line has all white space, write '\n'
-        if (in_leading_space && append_newline) {
-            *dest++ = '\n';
-            continue;
-        }
-
-        /* copy [new_line_start + candidate_len, iter) to buffer, then
-            conditionally append '\n' */
-
-        Py_ssize_t new_line_len = iter - line_start - candidate_len;
-        assert(new_line_len >= 0);
-
-        memcpy(dest, line_start + candidate_len, new_line_len);
-
-        dest += new_line_len;
-
-        if (append_newline) {
-            *dest++ = '\n';
-        }
-    }
-    *dest_len = dest - dest_start;
-    return 1;
-}
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index fe2660c6ce6058..284185756f18bc 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -13343,6 +13343,147 @@ unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored))
     return Py_BuildValue("(N)", copy);
 }
 
+/* Dedent a string.
+   Behaviour is expected to be an exact match of `textwrap.dedent`.
+   Return a new reference on success, NULL with exception set on error.
+   */
+PyAPI_FUNC(PyObject *)
+_PyUnicode_Dedent(PyObject *unicode)
+{
+    Py_ssize_t src_len = 0;
+    const char *src = PyUnicode_AsUTF8AndSize(unicode, &src_len);
+    if (!src) {
+        return NULL;
+    }
+    if (src_len <= 0) {
+        Py_INCREF(unicode);
+        return unicode;
+    }
+
+    const char *end = src + src_len;
+
+    // [candidate_start, candidate_start + candidate_len)
+    // describes the current longest common leading whitespace
+    const char *candidate_start = NULL;
+    Py_ssize_t candidate_len = 0;
+
+    for (const char *iter = src; iter < end; ++iter) {
+        const char *line_start = iter;
+        const char *leading_whitespace_end = NULL;
+
+        // scan the whole line
+        while (iter < end && *iter != '\n') {
+            if (!leading_whitespace_end && *iter != ' ' && *iter != '\t') {
+                /* `iter` points to the first non-whitespace character
+                   in this line */
+                if (iter == line_start) {
+                    // some line has no indent, fast exit!
+                    Py_INCREF(unicode);
+                    return unicode;
+                }
+                leading_whitespace_end = iter;
+            }
+            ++iter;
+        }
+
+        // if this line has all white space, skip it
+        if (!leading_whitespace_end) {
+            continue;
+        }
+
+        if (!candidate_start) {
+            // update the first leading whitespace
+            candidate_start = line_start;
+            candidate_len = leading_whitespace_end - line_start;
+            assert(candidate_len > 0);
+        } else {
+            /* We then compare with the current longest leading whitespace.
+
+               [line_start, leading_whitespace_end) is the leading whitespace of
+               this line,
+
+               [candidate_start, candidate_start + candidate_len)
+               is the leading whitespace of the current longest leading
+               whitespace. */
+            Py_ssize_t new_candidate_len = 0;
+
+            for (const char *candidate_iter = candidate_start,
+                            *line_iter = line_start;
+                 candidate_iter < candidate_start + candidate_len &&
+                 line_iter < leading_whitespace_end;
+                 ++candidate_iter, ++line_iter) {
+                if (*candidate_iter != *line_iter) {
+                    break;
+                }
+                ++new_candidate_len;
+            }
+
+            candidate_len = new_candidate_len;
+            if (candidate_len == 0) {
+                // No common things now, fast exit!
+                Py_INCREF(unicode);
+                return unicode;
+            }
+        }
+    }
+
+    assert(candidate_len >= 0);
+    /* Final check for strings that contain nothing but whitespace. */
+    if (candidate_len == 0) {
+        Py_INCREF(unicode);
+        return unicode;
+    }
+
+    // now we should trigger a dedent
+    char *dest = PyMem_Malloc(src_len);
+    if (!dest) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+    char *dest_iter = dest;
+
+    for (const char *iter = src; iter < end; ++iter) {
+        const char *line_start = iter;
+        bool in_leading_space = true;
+
+        // iterate over a line to find the end of a line
+        while (iter < end && *iter != '\n') {
+            if (in_leading_space && *iter != ' ' && *iter != '\t') {
+                in_leading_space = false;
+            }
+            ++iter;
+        }
+
+        // invariant: *iter == '\n' or iter == end
+        bool append_newline = iter < end;
+
+        // if this line has all white space, write '\n' and continue
+        if (in_leading_space && append_newline) {
+            *dest_iter++ = '\n';
+            continue;
+        }
+
+        /* copy [new_line_start + candidate_len, iter) to buffer, then
+            conditionally append '\n' */
+
+        Py_ssize_t new_line_len = iter - line_start - candidate_len;
+        assert(new_line_len >= 0);
+        memcpy(dest_iter, line_start + candidate_len, new_line_len);
+
+        dest_iter += new_line_len;
+
+        if (append_newline) {
+            *dest_iter++ = '\n';
+        }
+    }
+
+    Py_ssize_t dest_len = dest_iter - dest;
+
+    PyObject *res = PyUnicode_FromStringAndSize(dest, dest_len);
+    PyMem_Free(dest);
+    return res;
+}
+
 static PyMethodDef unicode_methods[] = {
     UNICODE_ENCODE_METHODDEF
     UNICODE_REPLACE_METHODDEF

From 1735d0f900a357ce3825e43281b325da79ff594b Mon Sep 17 00:00:00 2001
From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com>
Date: Tue, 25 Jul 2023 00:13:06 +0800
Subject: [PATCH 31/42] Apply suggestions from code review

clean up things
---
 Modules/main.c        | 1 -
 Objects/bytesobject.c | 2 --
 2 files changed, 3 deletions(-)

diff --git a/Modules/main.c b/Modules/main.c
index 27cdcb042584fa..86505ac38418a9 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -7,7 +7,6 @@
 #include "pycore_pathconfig.h"    // _PyPathConfig_ComputeSysPath0()
 #include "pycore_pylifecycle.h"   // _Py_PreInitializeFromPyArgv()
 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
-#include "pycore_bytesobject.h"   // _PyBytes_Dedent()
 
 /* Includes for exit_sigint() */
 #include <stdio.h>                // perror()
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index fdfef6cbcdeb06..42dac3a41fe03c 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -3535,8 +3535,6 @@ _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
 }
 
 
-/* Algorithms on bytes */
-
 void
 _PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
     const char* src, Py_ssize_t len_src)

From d3681b71e2bb233870d0c2aeccecdbffdf9a8195 Mon Sep 17 00:00:00 2001
From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com>
Date: Tue, 25 Jul 2023 10:14:29 +0800
Subject: [PATCH 32/42] clean up things

---
 Objects/bytesobject.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 42dac3a41fe03c..6b9231a9fa7693 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -3557,3 +3557,4 @@ _PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
         }
     }
 }
+

From d1b4cd17d747a9c960e596fc2ecfb1a6be95b106 Mon Sep 17 00:00:00 2001
From: Jon Crall <erotemic@gmail.com>
Date: Thu, 10 Apr 2025 11:44:42 -0400
Subject: [PATCH 33/42] Update Misc/NEWS.d/next/Core and
 Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst

Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
---
 .../2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst              | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst b/Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst
index 2e033ba98e12b3..511ca8fa732fa6 100644
--- a/Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst	
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst	
@@ -2,5 +2,3 @@ String arguments passed to "-c" are now automatically dedented as if by
 :func:`textwrap.dedent`. This allows "python -c" invocations to be indented
 in shell scripts without causing indentation errors. (Patch by Jon Crall and
 Steven Sun)
-
-Add an internal API :c:func:`_PyUnicode_Dedent`. (Patch by Steven Sun)

From e556bbffb23f2e1d436e09f6da591c7acf05e9d2 Mon Sep 17 00:00:00 2001
From: joncrall <erotemic@gmail.com>
Date: Thu, 10 Apr 2025 11:49:30 -0400
Subject: [PATCH 34/42] lint: space in folder name

---
 .../2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst                | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename Misc/NEWS.d/next/{Core and Builtins => Core_and_Builtins}/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst (100%)

diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst b/Misc/NEWS.d/next/Core_and_Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst
similarity index 100%
rename from Misc/NEWS.d/next/Core and Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst
rename to Misc/NEWS.d/next/Core_and_Builtins/2023-04-29-23-15-38.gh-issue-103997.BS3uVt.rst

From 136c8b0892e08a6662fc4c0d0e922c419a774892 Mon Sep 17 00:00:00 2001
From: joncrall <erotemic@gmail.com>
Date: Thu, 10 Apr 2025 11:54:46 -0400
Subject: [PATCH 35/42] Explicit include of pycore_unicodeobject.h

---
 Modules/main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Modules/main.c b/Modules/main.c
index 3a5c9caac3ce3a..ea1239ecc57f00 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -11,6 +11,7 @@
 #include "pycore_pylifecycle.h"   // _Py_PreInitializeFromPyArgv()
 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
 #include "pycore_pythonrun.h"     // _PyRun_AnyFileObject()
+#include "pycore_unicodeobject.h" // _PyUnicode_Dedent()
 
 /* Includes for exit_sigint() */
 #include <stdio.h>                // perror()

From cd14a00bea12ba4dc326d008ec03ccbadfb2d627 Mon Sep 17 00:00:00 2001
From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com>
Date: Thu, 17 Apr 2025 23:21:18 +0800
Subject: [PATCH 36/42] Apply suggestions from code review

Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
---
 Lib/test/test_cmd_line.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
index e1c6fb9c64380d..66768c28567c54 100644
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -1096,13 +1096,13 @@ def test_cmd_dedent(self):
             args1 = sys.executable, '-c', case['code']
             proc1 = subprocess.run(args1, stdout=subprocess.PIPE)
             self.assertEqual(proc1.returncode, 0, proc1)
-            output1 = proc1.stdout.strip()
+            output1 = proc1.stdout.strip().decode(encoding='utf-8')
 
             # Manually dedent beforehand, check the result is the same.
             args2 = sys.executable, '-c', dedent(case['code'])
             proc2 = subprocess.run(args2, stdout=subprocess.PIPE)
             self.assertEqual(proc2.returncode, 0, proc2)
-            output2 = proc2.stdout.strip()
+            output2 = proc2.stdout.strip().decode(encoding='utf-8')
 
             self.assertEqual(output1, output2)
             self.assertEqual(output1.replace(b'\r\n', b'\n'), case['expected'])

From 07d2273ee1ad68689594a64e32c5e85b48facd97 Mon Sep 17 00:00:00 2001
From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com>
Date: Fri, 18 Apr 2025 04:32:14 +0800
Subject: [PATCH 37/42] Resolve Comments

Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
---
 Lib/test/test_cmd_line.py | 54 ++++++++++++++++++++++-----------------
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
index 66768c28567c54..c4a38f80e0db4e 100644
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -1055,28 +1055,32 @@ def test_cmd_dedent(self):
         # test that -c auto-dedents its arguments
         from textwrap import dedent
         test_cases = [
-            {
-                'code': '''
+            (
+                """
                     print('space-auto-dedent')
-                ''',
-                'expected': b'space-auto-dedent',
-            },
-            {
-                'code': dedent('''
+                """,
+                "space-auto-dedent",
+            ),
+            (
+                dedent(
+                    """
                 ^^^print('tab-auto-dedent')
-                ''').replace('^', '\t'),
-                'expected': b'tab-auto-dedent',
-            },
-            {
-                'code': dedent('''
+                """
+                ).replace("^", "\t"),
+                "tab-auto-dedent",
+            ),
+            (
+                dedent(
+                    """
                 ^^if 1:
                 ^^^^print('mixed-auto-dedent-1')
                 ^^print('mixed-auto-dedent-2')
-                ''').replace('^', '\t \t'),
-                'expected': b'mixed-auto-dedent-1\nmixed-auto-dedent-2',
-            },
-            {
-                'code': '''
+                """
+                ).replace("^", "\t \t"),
+                "mixed-auto-dedent-1\nmixed-auto-dedent-2",
+            ),
+            (
+                '''
                     data = """$
 
                     this data has an empty newline above and a newline with spaces below $
@@ -1084,28 +1088,30 @@ def test_cmd_dedent(self):
                     """$
                     if 1:         $
                         print(repr(data))$
-                '''.replace('$', ''),
+                '''.replace(
+                    "$", ""
+                ),
                 # Note: entirely blank lines are normalized to \n, even if they
                 # are part of a data string. This is consistent with
                 # textwrap.dedent behavior, but might not be intuitive.
-                'expected': b"'\\n\\nthis data has an empty newline above and a newline with spaces below \\n\\n'",
-            },
+                "'\\n\\nthis data has an empty newline above and a newline with spaces below \\n\\n'",
+            ),
         ]
-        for case in test_cases:
+        for code, expected in test_cases:
             # Run the auto-dedent case
-            args1 = sys.executable, '-c', case['code']
+            args1 = sys.executable, '-c', code
             proc1 = subprocess.run(args1, stdout=subprocess.PIPE)
             self.assertEqual(proc1.returncode, 0, proc1)
             output1 = proc1.stdout.strip().decode(encoding='utf-8')
 
             # Manually dedent beforehand, check the result is the same.
-            args2 = sys.executable, '-c', dedent(case['code'])
+            args2 = sys.executable, '-c', dedent(code)
             proc2 = subprocess.run(args2, stdout=subprocess.PIPE)
             self.assertEqual(proc2.returncode, 0, proc2)
             output2 = proc2.stdout.strip().decode(encoding='utf-8')
 
             self.assertEqual(output1, output2)
-            self.assertEqual(output1.replace(b'\r\n', b'\n'), case['expected'])
+            self.assertEqual(output1.replace('\r\n', '\n'), expected)
 
     def test_cmd_dedent_failcase(self):
         # Mixing tabs and spaces is not allowed

From ed6e17bdd4792386ce625b49b21bbd410692f925 Mon Sep 17 00:00:00 2001
From: sunmy2019 <59365878+sunmy2019@users.noreply.github.com>
Date: Fri, 18 Apr 2025 11:30:22 +0800
Subject: [PATCH 38/42] Refactor implementation

---
 Include/internal/pycore_unicodeobject.h |   2 +-
 Lib/test/test_cmd_line.py               |  11 +-
 Objects/unicodeobject.c                 | 130 ++++++++++++++----------
 3 files changed, 85 insertions(+), 58 deletions(-)

diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h
index a5b2b28a1ab8b1..c85d53b89accdb 100644
--- a/Include/internal/pycore_unicodeobject.h
+++ b/Include/internal/pycore_unicodeobject.h
@@ -251,7 +251,7 @@ extern Py_ssize_t _PyUnicode_InsertThousandsGrouping(
    Behaviour is expected to be an exact match of `textwrap.dedent`.
    Return a new reference on success, NULL with exception set on error.
    */
-PyAPI_FUNC(PyObject*) _PyUnicode_Dedent(PyObject *unicode);
+extern PyObject* _PyUnicode_Dedent(PyObject *unicode);
 
 /* --- Misc functions ----------------------------------------------------- */
 
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
index c4a38f80e0db4e..e1d1d03d4ff698 100644
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -17,6 +17,8 @@
     spawn_python, kill_python, assert_python_ok, assert_python_failure,
     interpreter_requires_environment
 )
+from textwrap import dedent
+
 
 if not support.has_subprocess_support:
     raise unittest.SkipTest("test module requires subprocess")
@@ -1053,7 +1055,6 @@ def test_int_max_str_digits(self):
 
     def test_cmd_dedent(self):
         # test that -c auto-dedents its arguments
-        from textwrap import dedent
         test_cases = [
             (
                 """
@@ -1096,6 +1097,14 @@ def test_cmd_dedent(self):
                 # textwrap.dedent behavior, but might not be intuitive.
                 "'\\n\\nthis data has an empty newline above and a newline with spaces below \\n\\n'",
             ),
+            (
+                '',
+                '',
+            ),
+            (
+                '  \t\n\t\n \t\t\t  \t\t \t\n\t\t \n\n\n\t\t\t   ',
+                '',
+            ),
         ]
         for code, expected in test_cases:
             # Run the auto-dedent case
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 2c1a803d22db2c..cc0fb70b5a66c2 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -14270,29 +14270,22 @@ unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored))
     return Py_BuildValue("(N)", copy);
 }
 
-/* Dedent a string.
-   Behaviour is expected to be an exact match of `textwrap.dedent`.
-   Return a new reference on success, NULL with exception set on error.
-   */
-PyAPI_FUNC(PyObject *)
-_PyUnicode_Dedent(PyObject *unicode)
-{
-    Py_ssize_t src_len = 0;
-    const char *src = PyUnicode_AsUTF8AndSize(unicode, &src_len);
-    if (!src) {
-        return NULL;
-    }
-    if (src_len <= 0) {
-        Py_INCREF(unicode);
-        return unicode;
-    }
-
-    const char *end = src + src_len;
-
-    // [candidate_start, candidate_start + candidate_len)
+/*
+This function searchs the longest common leading whitespace
+of all lines in the [src, end).
+It returns the length of the common leading whitespace and sets `output` to
+point to the beginning of the common leading whitespace if length > 0.
+*/
+static Py_ssize_t
+search_longest_common_leading_whitespace(
+    const char * const src,
+    const char * const end,
+    const char * * output
+) {
+    // [_start, _start + _len)
     // describes the current longest common leading whitespace
-    const char *candidate_start = NULL;
-    Py_ssize_t candidate_len = 0;
+    const char *_start = NULL;
+    Py_ssize_t _len = 0;
 
     for (const char *iter = src; iter < end; ++iter) {
         const char *line_start = iter;
@@ -14305,8 +14298,7 @@ _PyUnicode_Dedent(PyObject *unicode)
                    in this line */
                 if (iter == line_start) {
                     // some line has no indent, fast exit!
-                    Py_INCREF(unicode);
-                    return unicode;
+                    return 0;
                 }
                 leading_whitespace_end = iter;
             }
@@ -14318,47 +14310,73 @@ _PyUnicode_Dedent(PyObject *unicode)
             continue;
         }
 
-        if (!candidate_start) {
+        if (!_start) {
             // update the first leading whitespace
-            candidate_start = line_start;
-            candidate_len = leading_whitespace_end - line_start;
-            assert(candidate_len > 0);
-        } else {
+            _start = line_start;
+            _len = leading_whitespace_end - line_start;
+            assert(_len > 0);
+        }
+        else {
             /* We then compare with the current longest leading whitespace.
 
-               [line_start, leading_whitespace_end) is the leading whitespace of
-               this line,
+               [line_start, leading_whitespace_end) is the leading
+               whitespace of this line,
 
-               [candidate_start, candidate_start + candidate_len)
-               is the leading whitespace of the current longest leading
-               whitespace. */
-            Py_ssize_t new_candidate_len = 0;
+               [_start, _start + _len) is the leading whitespace of the
+               current longest leading whitespace. */
+            Py_ssize_t new_len = 0;
+            const char *_iter = _start, *line_iter = line_start;
 
-            for (const char *candidate_iter = candidate_start,
-                            *line_iter = line_start;
-                 candidate_iter < candidate_start + candidate_len &&
-                 line_iter < leading_whitespace_end;
-                 ++candidate_iter, ++line_iter) {
-                if (*candidate_iter != *line_iter) {
-                    break;
-                }
-                ++new_candidate_len;
+            while (_iter < _start + _len && line_iter < leading_whitespace_end
+                && *_iter == *line_iter)
+            {
+                ++_iter;
+                ++line_iter;
+                ++new_len;
             }
 
-            candidate_len = new_candidate_len;
-            if (candidate_len == 0) {
+            _len = new_len;
+            if (_len == 0) {
                 // No common things now, fast exit!
-                Py_INCREF(unicode);
-                return unicode;
+                return 0;
             }
         }
     }
 
-    assert(candidate_len >= 0);
-    /* Final check for strings that contain nothing but whitespace. */
-    if (candidate_len == 0) {
-        Py_INCREF(unicode);
-        return unicode;
+    assert(_len >= 0);
+    if (_len > 0) {
+        *output = _start;
+    }
+    return _len;
+}
+
+/* Dedent a string.
+   Behaviour is expected to be an exact match of `textwrap.dedent`.
+   Return a new reference on success, NULL with exception set on error.
+   */
+PyObject *
+_PyUnicode_Dedent(PyObject *unicode)
+{
+    Py_ssize_t src_len = 0;
+    const char *src = PyUnicode_AsUTF8AndSize(unicode, &src_len);
+    if (!src) {
+        return NULL;
+    }
+    assert(src_len >= 0);
+    if (src_len == 0) {
+        return Py_NewRef(unicode);
+    }
+
+    const char *const end = src + src_len;
+
+    // [whitespace_start, whitespace_start + whitespace_len)
+    // describes the current longest common leading whitespace
+    const char *whitespace_start = NULL;
+    Py_ssize_t whitespace_len = search_longest_common_leading_whitespace(
+        src, end, &whitespace_start);
+
+    if (whitespace_len == 0) {
+        return Py_NewRef(unicode);
     }
 
     // now we should trigger a dedent
@@ -14390,12 +14408,12 @@ _PyUnicode_Dedent(PyObject *unicode)
             continue;
         }
 
-        /* copy [new_line_start + candidate_len, iter) to buffer, then
+        /* copy [new_line_start + whitespace_len, iter) to buffer, then
             conditionally append '\n' */
 
-        Py_ssize_t new_line_len = iter - line_start - candidate_len;
+        Py_ssize_t new_line_len = iter - line_start - whitespace_len;
         assert(new_line_len >= 0);
-        memcpy(dest_iter, line_start + candidate_len, new_line_len);
+        memcpy(dest_iter, line_start + whitespace_len, new_line_len);
 
         dest_iter += new_line_len;
 

From 4c78c5772d0f47d5957cedb34574619e8101dfce Mon Sep 17 00:00:00 2001
From: Inada Naoki <songofacandy@gmail.com>
Date: Fri, 18 Apr 2025 15:55:01 +0900
Subject: [PATCH 39/42] Apply suggestions from code review

---
 Objects/unicodeobject.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index cc0fb70b5a66c2..577e350dfb4f05 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -14280,8 +14280,8 @@ static Py_ssize_t
 search_longest_common_leading_whitespace(
     const char * const src,
     const char * const end,
-    const char * * output
-) {
+    const char * * output)
+{
     // [_start, _start + _len)
     // describes the current longest common leading whitespace
     const char *_start = NULL;
@@ -14422,9 +14422,7 @@ _PyUnicode_Dedent(PyObject *unicode)
         }
     }
 
-    Py_ssize_t dest_len = dest_iter - dest;
-
-    PyObject *res = PyUnicode_FromStringAndSize(dest, dest_len);
+    PyObject *res = PyUnicode_FromStringAndSize(dest, dest_iter - dest);
     PyMem_Free(dest);
     return res;
 }

From 38d2a4ec693f2cea2192fd9ae3ab288e8297ef3a Mon Sep 17 00:00:00 2001
From: Inada Naoki <songofacandy@gmail.com>
Date: Fri, 18 Apr 2025 16:03:08 +0900
Subject: [PATCH 40/42] add what's new entry

---
 Doc/whatsnew/3.14.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index 7d469e83dc27ad..dda2f1a2a8f188 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -474,6 +474,11 @@ Other language changes
   explicitly overridden in the subclass.
   (Contributed by Tomasz Pytel in :gh:`132329`.)
 
+* The command line option :option:`-c` now automatically dedents its code
+  argument before execution. The auto-dedentation behavior mirrors
+  :func:`textwrap.dedent`.
+  (Contributed by Jon Crall and Steven Sun in :gh:`103998`.)
+
 .. _whatsnew314-pep765:
 
 PEP 765: Disallow return/break/continue that exit a finally block

From 42b633095a2d9290eb7415243ce4d0aa1772f398 Mon Sep 17 00:00:00 2001
From: Inada Naoki <songofacandy@gmail.com>
Date: Fri, 18 Apr 2025 16:10:13 +0900
Subject: [PATCH 41/42] Document dedentation of command in version 3.14

---
 Doc/using/cmdline.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst
index 9b5c6eb863e56d..2e9d8b7bf9590e 100644
--- a/Doc/using/cmdline.rst
+++ b/Doc/using/cmdline.rst
@@ -73,6 +73,9 @@ source.
 
    .. audit-event:: cpython.run_command command cmdoption-c
 
+   .. versionchanged:: 3.14
+      *command* is automatically dedented before execution.
+
 .. option:: -m <module-name>
 
    Search :data:`sys.path` for the named module and execute its contents as

From 98c17e5dc1764d9aa66f9706d72eed269e2b9993 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Fri, 18 Apr 2025 10:06:36 +0200
Subject: [PATCH 42/42] Apply suggestions from code review

---
 Doc/using/cmdline.rst   | 2 +-
 Doc/whatsnew/3.14.rst   | 1 +
 Objects/unicodeobject.c | 8 ++++----
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst
index 2e9d8b7bf9590e..fa7c9cddf9c6d6 100644
--- a/Doc/using/cmdline.rst
+++ b/Doc/using/cmdline.rst
@@ -73,7 +73,7 @@ source.
 
    .. audit-event:: cpython.run_command command cmdoption-c
 
-   .. versionchanged:: 3.14
+   .. versionchanged:: next
       *command* is automatically dedented before execution.
 
 .. option:: -m <module-name>
diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst
index dda2f1a2a8f188..aaa4702d53df93 100644
--- a/Doc/whatsnew/3.14.rst
+++ b/Doc/whatsnew/3.14.rst
@@ -479,6 +479,7 @@ Other language changes
   :func:`textwrap.dedent`.
   (Contributed by Jon Crall and Steven Sun in :gh:`103998`.)
 
+
 .. _whatsnew314-pep765:
 
 PEP 765: Disallow return/break/continue that exit a finally block
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 577e350dfb4f05..e01a10fc19e904 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -14278,9 +14278,9 @@ point to the beginning of the common leading whitespace if length > 0.
 */
 static Py_ssize_t
 search_longest_common_leading_whitespace(
-    const char * const src,
-    const char * const end,
-    const char * * output)
+    const char *const src,
+    const char *const end,
+    const char **output)
 {
     // [_start, _start + _len)
     // describes the current longest common leading whitespace
@@ -14328,7 +14328,7 @@ search_longest_common_leading_whitespace(
             const char *_iter = _start, *line_iter = line_start;
 
             while (_iter < _start + _len && line_iter < leading_whitespace_end
-                && *_iter == *line_iter)
+                   && *_iter == *line_iter)
             {
                 ++_iter;
                 ++line_iter;