Skip to content

Commit 9c197bc

Browse files
authored
GH-96172 fix unicodedata.east_asian_width being wrong on unassigned code points (#96207)
1 parent c1581a9 commit 9c197bc

File tree

4 files changed

+614
-548
lines changed

4 files changed

+614
-548
lines changed

Lib/test/test_unicodedata.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
7171

7272
# Update this if the database changes. Make sure to do a full rebuild
7373
# (e.g. 'make distclean && make') to get the correct checksum.
74-
expectedchecksum = '98d602e1f69d5c5bb8a5910c40bbbad4e18e8370'
74+
expectedchecksum = '4975f3ec0acd4a62465d18c9bf8519b1964181f6'
7575

7676
@requires_resource('cpu')
7777
def test_function_checksum(self):
@@ -90,6 +90,7 @@ def test_function_checksum(self):
9090
self.db.decomposition(char),
9191
str(self.db.mirrored(char)),
9292
str(self.db.combining(char)),
93+
unicodedata.east_asian_width(char),
9394
]
9495
h.update(''.join(data).encode("ascii"))
9596
result = h.hexdigest()
@@ -220,6 +221,23 @@ def test_east_asian_width(self):
220221
self.assertEqual(eaw('\u2010'), 'A')
221222
self.assertEqual(eaw('\U00020000'), 'W')
222223

224+
def test_east_asian_width_unassigned(self):
225+
eaw = self.db.east_asian_width
226+
# unassigned
227+
for char in '\u0530\u0ece\u10c6\u20fc\uaaca\U000107bd\U000115f2':
228+
self.assertEqual(eaw(char), 'N')
229+
self.assertIs(self.db.name(char, None), None)
230+
231+
# unassigned but reserved for CJK
232+
for char in '\uFA6E\uFADA\U0002A6E0\U0002FA20\U0003134B\U0003FFFD':
233+
self.assertEqual(eaw(char), 'W')
234+
self.assertIs(self.db.name(char, None), None)
235+
236+
# private use areas
237+
for char in '\uE000\uF800\U000F0000\U000FFFEE\U00100000\U0010FFF0':
238+
self.assertEqual(eaw(char), 'A')
239+
self.assertIs(self.db.name(char, None), None)
240+
223241
def test_east_asian_width_9_0_changes(self):
224242
self.assertEqual(self.db.ucd_3_2_0.east_asian_width('\u231a'), 'N')
225243
self.assertEqual(self.db.east_asian_width('\u231a'), 'W')
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fix a bug in ``unicodedata``: ``east_asian_width`` used to return the wrong
2+
value for unassigned characters; and for yet unassigned, but reserved
3+
characters.

0 commit comments

Comments
 (0)