From 8ae6408a7719604e1eb57b8ee0fcfdba94d3ae1b Mon Sep 17 00:00:00 2001
From: Hai Shi <shihai1992@gmail.com>
Date: Sun, 13 Sep 2020 02:03:17 +0800
Subject: [PATCH 01/11] encodings.normalize_encoding() should ignore non-ASCII
 letters

---
 Lib/encodings/__init__.py                           |  3 ++-
 Lib/test/test_source_encoding.py                    | 13 +++++++++++++
 .../2020-09-13-02-02-18.bpo-39337.L3NXTt.rst        |  2 ++
 3 files changed, 17 insertions(+), 1 deletion(-)
 create mode 100644 Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst

diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py
index ddd5afdcf2dab0..4b37d3321c9033 100644
--- a/Lib/encodings/__init__.py
+++ b/Lib/encodings/__init__.py
@@ -61,7 +61,8 @@ def normalize_encoding(encoding):
         if c.isalnum() or c == '.':
             if punct and chars:
                 chars.append('_')
-            chars.append(c)
+            if c.isascii():
+                chars.append(c)
             punct = False
         else:
             punct = True
diff --git a/Lib/test/test_source_encoding.py b/Lib/test/test_source_encoding.py
index b410c03221bf32..eb24176c0a259b 100644
--- a/Lib/test/test_source_encoding.py
+++ b/Lib/test/test_source_encoding.py
@@ -226,5 +226,18 @@ def check_script_output(self, src, expected):
         self.assertEqual(res.out.rstrip(), expected)
 
 
+class EncodingsTest(unittest.TestCase):
+
+    def test_bpo39337(self):
+        """
+        bpo-39337: similar to _Py_normalize_encoding(),
+        encodings.normalize_encoding() should ignore non-ASCII letters .
+        """
+        import encodings
+
+        out = encodings.normalize_encoding("���-8")
+        self.assertEqual(out, '8')
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst b/Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst
new file mode 100644
index 00000000000000..f734592f23c851
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst
@@ -0,0 +1,2 @@
+similar to :c:func:`_Py_normalize_encoding`,
+:func:`encodings.normalize_encoding` should ignore non-ASCII letters.

From 0fcafb883cd25b0f1511c88245e29617f33f48b5 Mon Sep 17 00:00:00 2001
From: Hai Shi <shihai1992@gmail.com>
Date: Sun, 13 Sep 2020 02:06:20 +0800
Subject: [PATCH 02/11] update test

update test
---
 Lib/test/test_source_encoding.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Lib/test/test_source_encoding.py b/Lib/test/test_source_encoding.py
index eb24176c0a259b..a18ac241d0041a 100644
--- a/Lib/test/test_source_encoding.py
+++ b/Lib/test/test_source_encoding.py
@@ -14,11 +14,11 @@ class MiscSourceEncodingTest(unittest.TestCase):
 
     def test_pep263(self):
         self.assertEqual(
-            "�����".encode("utf-8"),
+            "ðÉÔÏÎ".encode("utf-8"),
             b'\xd0\x9f\xd0\xb8\xd1\x82\xd0\xbe\xd0\xbd'
         )
         self.assertEqual(
-            "\�".encode("utf-8"),
+            "\ð".encode("utf-8"),
             b'\\\xd0\x9f'
         )
 
@@ -235,7 +235,7 @@ def test_bpo39337(self):
         """
         import encodings
 
-        out = encodings.normalize_encoding("���-8")
+        out = encodings.normalize_encoding("кои-8")
         self.assertEqual(out, '8')
 
 

From dea24d84d6b171c1ffc379b300c6ee31436683d9 Mon Sep 17 00:00:00 2001
From: Hai Shi <shihai1992@gmail.com>
Date: Sun, 13 Sep 2020 02:23:37 +0800
Subject: [PATCH 03/11] Revert "update test"

This reverts commit 0fcafb883cd25b0f1511c88245e29617f33f48b5.
---
 Lib/test/test_source_encoding.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Lib/test/test_source_encoding.py b/Lib/test/test_source_encoding.py
index a18ac241d0041a..eb24176c0a259b 100644
--- a/Lib/test/test_source_encoding.py
+++ b/Lib/test/test_source_encoding.py
@@ -14,11 +14,11 @@ class MiscSourceEncodingTest(unittest.TestCase):
 
     def test_pep263(self):
         self.assertEqual(
-            "ðÉÔÏÎ".encode("utf-8"),
+            "�����".encode("utf-8"),
             b'\xd0\x9f\xd0\xb8\xd1\x82\xd0\xbe\xd0\xbd'
         )
         self.assertEqual(
-            "\ð".encode("utf-8"),
+            "\�".encode("utf-8"),
             b'\\\xd0\x9f'
         )
 
@@ -235,7 +235,7 @@ def test_bpo39337(self):
         """
         import encodings
 
-        out = encodings.normalize_encoding("кои-8")
+        out = encodings.normalize_encoding("���-8")
         self.assertEqual(out, '8')
 
 

From dbb0062bd00f1dc9fd889eac8de1a7e43f5f4fce Mon Sep 17 00:00:00 2001
From: Hai Shi <shihai1992@gmail.com>
Date: Sat, 3 Oct 2020 13:41:17 +0800
Subject: [PATCH 04/11] apply victor's comment

---
 Lib/test/test_source_encoding.py                              | 4 ++--
 .../next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst     | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/Lib/test/test_source_encoding.py b/Lib/test/test_source_encoding.py
index eb24176c0a259b..59261718cfb1ff 100644
--- a/Lib/test/test_source_encoding.py
+++ b/Lib/test/test_source_encoding.py
@@ -235,8 +235,8 @@ def test_bpo39337(self):
         """
         import encodings
 
-        out = encodings.normalize_encoding("���-8")
-        self.assertEqual(out, '8')
+        out = encodings.normalize_encoding("utf\xE9\u20AC\U0010ffff-8")
+        self.assertEqual(out, 'utf_8')
 
 
 if __name__ == "__main__":
diff --git a/Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst b/Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst
index f734592f23c851..ea432d47c2831e 100644
--- a/Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst
+++ b/Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst
@@ -1,2 +1 @@
-similar to :c:func:`_Py_normalize_encoding`,
-:func:`encodings.normalize_encoding` should ignore non-ASCII letters.
+:func:`encodings.normalize_encoding` now ignores non-ASCII letters.

From 3fa221f25ebc82096f806506f4e93d8494f88c97 Mon Sep 17 00:00:00 2001
From: Hai Shi <shihai1992@gmail.com>
Date: Fri, 9 Oct 2020 12:34:04 +0800
Subject: [PATCH 05/11] apply victor's comment

---
 Lib/test/test_codecs.py          | 17 +++++++++++++++++
 Lib/test/test_source_encoding.py | 13 -------------
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 3dd56820cd1078..13e075e4947f17 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -3403,5 +3403,22 @@ def test_rot13_func(self):
             'To be, or not to be, that is the question')
 
 
+class EncodingNormalizationTest(unittest.TestCase):
+
+    def test_bpo39337(self):
+        """
+        bpo-39337: similar to _Py_normalize_encoding(),
+        encodings.normalize_encoding() should ignore non-ASCII letters.
+        """
+        import encodings
+
+        out = encodings.normalize_encoding('utf\xE9\u20AC\U0010ffff-8')
+        self.assertEqual(out, 'utf_8')
+        out = encodings.normalize_encoding('utf_8')
+        self.assertEqual(out, 'utf_8')
+        out = encodings.normalize_encoding('utf   8')
+        self.assertEqual(out, 'utf_8')
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Lib/test/test_source_encoding.py b/Lib/test/test_source_encoding.py
index 59261718cfb1ff..b410c03221bf32 100644
--- a/Lib/test/test_source_encoding.py
+++ b/Lib/test/test_source_encoding.py
@@ -226,18 +226,5 @@ def check_script_output(self, src, expected):
         self.assertEqual(res.out.rstrip(), expected)
 
 
-class EncodingsTest(unittest.TestCase):
-
-    def test_bpo39337(self):
-        """
-        bpo-39337: similar to _Py_normalize_encoding(),
-        encodings.normalize_encoding() should ignore non-ASCII letters .
-        """
-        import encodings
-
-        out = encodings.normalize_encoding("utf\xE9\u20AC\U0010ffff-8")
-        self.assertEqual(out, 'utf_8')
-
-
 if __name__ == "__main__":
     unittest.main()

From 2e73d13ef9e7999ac7fe575c5eb380f41d59cb37 Mon Sep 17 00:00:00 2001
From: hai shi <shihai1991@126.com>
Date: Sat, 10 Oct 2020 08:34:08 +0800
Subject: [PATCH 06/11] apply victor's comments

---
 Lib/test/test_codecs.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 40d2da7da7b104..d579f23c25d414 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -3442,19 +3442,18 @@ def search_function(encoding):
 
 class EncodingNormalizationTest(unittest.TestCase):
 
-    def test_bpo39337(self):
-        """
-        bpo-39337: similar to _Py_normalize_encoding(),
-        encodings.normalize_encoding() should ignore non-ASCII letters.
-        """
-        import encodings
-
+    def test_normalization(self):
+        # encodings.normalize_encoding() ignores non-ASCII letters.
         out = encodings.normalize_encoding('utf\xE9\u20AC\U0010ffff-8')
         self.assertEqual(out, 'utf_8')
         out = encodings.normalize_encoding('utf_8')
         self.assertEqual(out, 'utf_8')
         out = encodings.normalize_encoding('utf   8')
         self.assertEqual(out, 'utf_8')
+        out = encodings.normalize_encoding('UTF 8')
+        self.assertEqual(out, 'UTF_8')
+        out = encodings.normalize_encoding('utf...8')
+        self.assertEqual(out, 'utf...8')
 
 
 if __name__ == "__main__":

From 95c1d980d08200d622d04ed559c0449cb102c9ec Mon Sep 17 00:00:00 2001
From: Hai Shi <shihai1992@gmail.com>
Date: Mon, 12 Oct 2020 08:35:46 +0800
Subject: [PATCH 07/11] apply victor's comment

---
 Lib/test/test_codecs.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index d579f23c25d414..7497c6615d2cd0 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -3417,7 +3417,7 @@ def test_rot13_func(self):
 
 class CodecNameNormalizationTest(unittest.TestCase):
     """Test codec name normalization"""
-    def test_normalized_encoding(self):
+    def test_codecs_lookup(self):
         FOUND = (1, 2, 3, 4)
         NOT_FOUND = (None, None, None, None)
         def search_function(encoding):
@@ -3439,19 +3439,20 @@ def search_function(encoding):
         self.assertEqual(NOT_FOUND, codecs.lookup('BBB.8'))
         self.assertEqual(NOT_FOUND, codecs.lookup('a\xe9\u20ac-8'))
 
-
-class EncodingNormalizationTest(unittest.TestCase):
-
-    def test_normalization(self):
+    def test_encodings_normalize_encoding(self):
         # encodings.normalize_encoding() ignores non-ASCII letters.
-        out = encodings.normalize_encoding('utf\xE9\u20AC\U0010ffff-8')
-        self.assertEqual(out, 'utf_8')
         out = encodings.normalize_encoding('utf_8')
         self.assertEqual(out, 'utf_8')
+        out = encodings.normalize_encoding('utf\xE9\u20AC\U0010ffff-8')
+        self.assertEqual(out, 'utf_8')
         out = encodings.normalize_encoding('utf   8')
         self.assertEqual(out, 'utf_8')
+        # encodings.normalize_encoding() doesn't convert
+        # characters to lower case.
         out = encodings.normalize_encoding('UTF 8')
         self.assertEqual(out, 'UTF_8')
+        out = encodings.normalize_encoding('utf.8')
+        self.assertEqual(out, 'utf.8')
         out = encodings.normalize_encoding('utf...8')
         self.assertEqual(out, 'utf...8')
 

From 03bfd9b9266b99e88c4018be6e819b8595ff19fc Mon Sep 17 00:00:00 2001
From: Hai Shi <shihai1992@gmail.com>
Date: Mon, 12 Oct 2020 22:17:32 +0800
Subject: [PATCH 08/11] apply victor's comment

---
 Lib/test/test_codecs.py | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 7497c6615d2cd0..641ffbd9ee408c 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -3441,20 +3441,16 @@ def search_function(encoding):
 
     def test_encodings_normalize_encoding(self):
         # encodings.normalize_encoding() ignores non-ASCII letters.
-        out = encodings.normalize_encoding('utf_8')
-        self.assertEqual(out, 'utf_8')
-        out = encodings.normalize_encoding('utf\xE9\u20AC\U0010ffff-8')
-        self.assertEqual(out, 'utf_8')
-        out = encodings.normalize_encoding('utf   8')
-        self.assertEqual(out, 'utf_8')
+        self.assertEqual(encodings.normalize_encoding('utf_8'), 'utf_8')
+        self.assertEqual(
+                encodings.normalize_encoding('utf\xE9\u20AC\U0010ffff-8'),
+                'utf_8')
+        self.assertEqual(encodings.normalize_encoding('utf   8'), 'utf_8')
         # encodings.normalize_encoding() doesn't convert
         # characters to lower case.
-        out = encodings.normalize_encoding('UTF 8')
-        self.assertEqual(out, 'UTF_8')
-        out = encodings.normalize_encoding('utf.8')
-        self.assertEqual(out, 'utf.8')
-        out = encodings.normalize_encoding('utf...8')
-        self.assertEqual(out, 'utf...8')
+        self.assertEqual(encodings.normalize_encoding('UTF 8'), 'UTF_8')
+        self.assertEqual(encodings.normalize_encoding('utf.8'), 'utf.8')
+        self.assertEqual(encodings.normalize_encoding('utf...8'), 'utf...8')
 
 
 if __name__ == "__main__":

From 38f28bd5ad2623c78b7570fb71abe30b7a78c3ec Mon Sep 17 00:00:00 2001
From: Hai Shi <shihai1992@gmail.com>
Date: Tue, 13 Oct 2020 08:02:39 +0800
Subject: [PATCH 09/11] apply victor's comment

---
 Lib/test/test_codecs.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 641ffbd9ee408c..011f5474e3ca5c 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -3441,16 +3441,15 @@ def search_function(encoding):
 
     def test_encodings_normalize_encoding(self):
         # encodings.normalize_encoding() ignores non-ASCII letters.
-        self.assertEqual(encodings.normalize_encoding('utf_8'), 'utf_8')
-        self.assertEqual(
-                encodings.normalize_encoding('utf\xE9\u20AC\U0010ffff-8'),
-                'utf_8')
-        self.assertEqual(encodings.normalize_encoding('utf   8'), 'utf_8')
+        normalize = encodings.normalize_encoding
+        self.assertEqual(normalize('utf_8'), 'utf_8')
+        self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
+        self.assertEqual(normalize('utf   8'), 'utf_8')
         # encodings.normalize_encoding() doesn't convert
         # characters to lower case.
-        self.assertEqual(encodings.normalize_encoding('UTF 8'), 'UTF_8')
-        self.assertEqual(encodings.normalize_encoding('utf.8'), 'utf.8')
-        self.assertEqual(encodings.normalize_encoding('utf...8'), 'utf...8')
+        self.assertEqual(normalize('UTF 8'), 'UTF_8')
+        self.assertEqual(normalize('utf.8'), 'utf.8')
+        self.assertEqual(normalize('utf...8'), 'utf...8')
 
 
 if __name__ == "__main__":

From 5982784bf66fc8dbd8dac0f3882af5844496b824 Mon Sep 17 00:00:00 2001
From: Hai Shi <shihai1992@gmail.com>
Date: Tue, 13 Oct 2020 17:06:30 +0800
Subject: [PATCH 10/11] apply victor's comment

---
 Doc/whatsnew/3.10.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst
index 4ada4be3b66715..fcf42d8837bf0f 100644
--- a/Doc/whatsnew/3.10.rst
+++ b/Doc/whatsnew/3.10.rst
@@ -186,6 +186,11 @@ by :func:`curses.color_content`, :func:`curses.init_color`,
 support is provided by the underlying ncurses library.
 (Contributed by Jeffrey Kintscher and Hans Petter Jansson in :issue:`36982`.)
 
+encodings
+---------
+:func:`encodings.normalize_encoding` now ignores non-ASCII letters.
+(Contributed by Hai Shi in :issue:`39337`.)
+
 glob
 ----
 

From 4ecb8a1bb1f5e973e8d3eaeb0685d849d4023555 Mon Sep 17 00:00:00 2001
From: Hai Shi <shihai1992@gmail.com>
Date: Wed, 14 Oct 2020 07:52:09 +0800
Subject: [PATCH 11/11] apply victor's comment

---
 Doc/whatsnew/3.10.rst                                           | 2 +-
 Lib/test/test_codecs.py                                         | 2 +-
 .../next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst
index fcf42d8837bf0f..c34c36ae102a05 100644
--- a/Doc/whatsnew/3.10.rst
+++ b/Doc/whatsnew/3.10.rst
@@ -188,7 +188,7 @@ support is provided by the underlying ncurses library.
 
 encodings
 ---------
-:func:`encodings.normalize_encoding` now ignores non-ASCII letters.
+:func:`encodings.normalize_encoding` now ignores non-ASCII characters.
 (Contributed by Hai Shi in :issue:`39337`.)
 
 glob
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 011f5474e3ca5c..09ceef76eb098d 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -3440,7 +3440,7 @@ def search_function(encoding):
         self.assertEqual(NOT_FOUND, codecs.lookup('a\xe9\u20ac-8'))
 
     def test_encodings_normalize_encoding(self):
-        # encodings.normalize_encoding() ignores non-ASCII letters.
+        # encodings.normalize_encoding() ignores non-ASCII characters.
         normalize = encodings.normalize_encoding
         self.assertEqual(normalize('utf_8'), 'utf_8')
         self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
diff --git a/Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst b/Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst
index ea432d47c2831e..c2b4dbe4d12e8e 100644
--- a/Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst
+++ b/Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst
@@ -1 +1 @@
-:func:`encodings.normalize_encoding` now ignores non-ASCII letters.
+:func:`encodings.normalize_encoding` now ignores non-ASCII characters.