74
74
from .table_zero import ZERO_WIDTH
75
75
76
76
77
- def _bisearch (ucs , table ):
77
+ # NOTE: created by hand, there isn't anything identifiable other than
78
+ # general Cf category code to identify these, and some characters in Cf
79
+ # category code are of non-zero width.
80
+ # Also includes some Cc, Mn, Zl, and Zp characters
81
+ ZERO_WIDTH_CF = set ([
82
+ 0 , # Null (Cc)
83
+ 0x034F , # Combining grapheme joiner (Mn)
84
+ 0x200B , # Zero width space
85
+ 0x200C , # Zero width non-joiner
86
+ 0x200D , # Zero width joiner
87
+ 0x200E , # Left-to-right mark
88
+ 0x200F , # Right-to-left mark
89
+ 0x2028 , # Line separator (Zl)
90
+ 0x2029 , # Paragraph separator (Zp)
91
+ 0x202A , # Left-to-right embedding
92
+ 0x202B , # Right-to-left embedding
93
+ 0x202C , # Pop directional formatting
94
+ 0x202D , # Left-to-right override
95
+ 0x202E , # Right-to-left override
96
+ 0x2060 , # Word joiner
97
+ 0x2061 , # Function application
98
+ 0x2062 , # Invisible times
99
+ 0x2063 , # Invisible separator
100
+ ])
101
+
102
+ UBOUND_ZERO_WIDTH = len (ZERO_WIDTH ) - 1
103
+ UBOUND_WIDE_EASTASIAN = len (WIDE_EASTASIAN ) - 1
104
+
105
+
106
+ def _bisearch (ucs , table , ubound ):
78
107
"""
79
108
Auxiliary function for binary search in interval table.
80
109
@@ -85,7 +114,6 @@ def _bisearch(ucs, table):
85
114
:returns: 1 if ordinal value ucs is found within lookup table, else 0.
86
115
"""
87
116
lbound = 0
88
- ubound = len (table ) - 1
89
117
90
118
if ucs < table [0 ][0 ] or ucs > table [ubound ][1 ]:
91
119
return 0
@@ -101,7 +129,7 @@ def _bisearch(ucs, table):
101
129
return 0
102
130
103
131
104
- def wcwidth (wc ):
132
+ def wcwidth (wc ): # pylint: disable=invalid-name
105
133
r"""
106
134
Given one unicode character, return its printable length on a terminal.
107
135
@@ -152,34 +180,20 @@ def wcwidth(wc):
152
180
Full-width (F) category as defined in Unicode Technical
153
181
Report #11 have a column width of 2.
154
182
"""
155
- # pylint: disable=C0103
156
- # Invalid argument name "wc"
157
183
ucs = ord (wc )
158
184
159
- # NOTE: created by hand, there isn't anything identifiable other than
160
- # general Cf category code to identify these, and some characters in Cf
161
- # category code are of non-zero width.
162
-
163
- # pylint: disable=too-many-boolean-expressions
164
- # Too many boolean expressions in if statement (7/5)
165
- if (ucs == 0 or
166
- ucs == 0x034F or
167
- 0x200B <= ucs <= 0x200F or
168
- ucs == 0x2028 or
169
- ucs == 0x2029 or
170
- 0x202A <= ucs <= 0x202E or
171
- 0x2060 <= ucs <= 0x2063 ):
185
+ if ucs in ZERO_WIDTH_CF :
172
186
return 0
173
187
174
188
# C0/C1 control characters
175
189
if ucs < 32 or 0x07F <= ucs < 0x0A0 :
176
190
return - 1
177
191
178
192
# combining characters with zero width
179
- if _bisearch (ucs , ZERO_WIDTH ):
193
+ if _bisearch (ucs , ZERO_WIDTH , UBOUND_ZERO_WIDTH ):
180
194
return 0
181
195
182
- return 1 + _bisearch (ucs , WIDE_EASTASIAN )
196
+ return 1 + _bisearch (ucs , WIDE_EASTASIAN , UBOUND_WIDE_EASTASIAN )
183
197
184
198
185
199
def wcswidth (pwcs , n = None ):
@@ -202,6 +216,5 @@ def wcswidth(pwcs, n=None):
202
216
wcw = wcwidth (char )
203
217
if wcw < 0 :
204
218
return - 1
205
- else :
206
- width += wcw
219
+ width += wcw
207
220
return width
0 commit comments