Skip to content

Commit 5688e6a

Browse files
committed
GH-96172 unicodedata: fix wrong default for east_asian_width
also return the correct width for unassigned but reserved characters according to EastAsianWidth.txt
1 parent 18b1782 commit 5688e6a

File tree

3 files changed

+598
-547
lines changed

3 files changed

+598
-547
lines changed

Lib/test/test_unicodedata.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,18 @@ def test_east_asian_width(self):
220220
self.assertEqual(eaw('\u2010'), 'A')
221221
self.assertEqual(eaw('\U00020000'), 'W')
222222

223+
def test_east_asian_width_unassigned(self):
224+
eaw = self.db.east_asian_width
225+
# unassigned
226+
for char in '\u0530\u0ece\u10c6\u20fc\uaaca\U000107bd\U000115f2':
227+
self.assertEqual(eaw(char), 'N')
228+
self.assertIs(self.db.name(char, None), None)
229+
230+
# unassigned but reserved
231+
for char in '\uFA6E\uFADA\U0002A6E0\U0002FA20\U0003134B\U0003FFFD':
232+
self.assertEqual(eaw(char), 'W')
233+
self.assertIs(self.db.name(char, None), None)
234+
223235
def test_east_asian_width_9_0_changes(self):
224236
self.assertEqual(self.db.ucd_3_2_0.east_asian_width('\u231a'), 'N')
225237
self.assertEqual(self.db.east_asian_width('\u231a'), 'W')

0 commit comments

Comments
 (0)