Skip to content

Commit 69dc30c

Browse files
committed
Optimize wcwidth()
1 parent 3b1a268 commit 69dc30c

File tree

1 file changed

+35
-22
lines changed

1 file changed

+35
-22
lines changed

wcwidth/wcwidth.py

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,36 @@
7474
from .table_zero import ZERO_WIDTH
7575

7676

77-
def _bisearch(ucs, table):
77+
# NOTE: created by hand, there isn't anything identifiable other than
78+
# general Cf category code to identify these, and some characters in Cf
79+
# category code are of non-zero width.
80+
# Also includes some Cc, Mn, Zl, and Zp characters
81+
ZERO_WIDTH_CF = set([
82+
0, # Null (Cc)
83+
0x034F, # Combining grapheme joiner (Mn)
84+
0x200B, # Zero width space
85+
0x200C, # Zero width non-joiner
86+
0x200D, # Zero width joiner
87+
0x200E, # Left-to-right mark
88+
0x200F, # Right-to-left mark
89+
0x2028, # Line separator (Zl)
90+
0x2029, # Paragraph separator (Zp)
91+
0x202A, # Left-to-right embedding
92+
0x202B, # Right-to-left embedding
93+
0x202C, # Pop directional formatting
94+
0x202D, # Left-to-right override
95+
0x202E, # Right-to-left override
96+
0x2060, # Word joiner
97+
0x2061, # Function application
98+
0x2062, # Invisible times
99+
0x2063, # Invisible separator
100+
])
101+
102+
UBOUND_ZERO_WIDTH = len(ZERO_WIDTH) - 1
103+
UBOUND_WIDE_EASTASIAN = len(WIDE_EASTASIAN) - 1
104+
105+
106+
def _bisearch(ucs, table, ubound):
78107
"""
79108
Auxiliary function for binary search in interval table.
80109
@@ -85,7 +114,6 @@ def _bisearch(ucs, table):
85114
:returns: 1 if ordinal value ucs is found within lookup table, else 0.
86115
"""
87116
lbound = 0
88-
ubound = len(table) - 1
89117

90118
if ucs < table[0][0] or ucs > table[ubound][1]:
91119
return 0
@@ -101,7 +129,7 @@ def _bisearch(ucs, table):
101129
return 0
102130

103131

104-
def wcwidth(wc):
132+
def wcwidth(wc): # pylint: disable=invalid-name
105133
r"""
106134
Given one unicode character, return its printable length on a terminal.
107135
@@ -152,34 +180,20 @@ def wcwidth(wc):
152180
Full-width (F) category as defined in Unicode Technical
153181
Report #11 have a column width of 2.
154182
"""
155-
# pylint: disable=C0103
156-
# Invalid argument name "wc"
157183
ucs = ord(wc)
158184

159-
# NOTE: created by hand, there isn't anything identifiable other than
160-
# general Cf category code to identify these, and some characters in Cf
161-
# category code are of non-zero width.
162-
163-
# pylint: disable=too-many-boolean-expressions
164-
# Too many boolean expressions in if statement (7/5)
165-
if (ucs == 0 or
166-
ucs == 0x034F or
167-
0x200B <= ucs <= 0x200F or
168-
ucs == 0x2028 or
169-
ucs == 0x2029 or
170-
0x202A <= ucs <= 0x202E or
171-
0x2060 <= ucs <= 0x2063):
185+
if ucs in ZERO_WIDTH_CF:
172186
return 0
173187

174188
# C0/C1 control characters
175189
if ucs < 32 or 0x07F <= ucs < 0x0A0:
176190
return -1
177191

178192
# combining characters with zero width
179-
if _bisearch(ucs, ZERO_WIDTH):
193+
if _bisearch(ucs, ZERO_WIDTH, UBOUND_ZERO_WIDTH):
180194
return 0
181195

182-
return 1 + _bisearch(ucs, WIDE_EASTASIAN)
196+
return 1 + _bisearch(ucs, WIDE_EASTASIAN, UBOUND_WIDE_EASTASIAN)
183197

184198

185199
def wcswidth(pwcs, n=None):
@@ -202,6 +216,5 @@ def wcswidth(pwcs, n=None):
202216
wcw = wcwidth(char)
203217
if wcw < 0:
204218
return -1
205-
else:
206-
width += wcw
219+
width += wcw
207220
return width

0 commit comments

Comments
 (0)