1
1
import functools
2
- import re
3
2
import string
4
3
import sys
5
4
import typing as t
@@ -16,9 +15,6 @@ def __html__(self) -> str:
16
15
17
16
__version__ = "2.1.4.dev"
18
17
19
- _strip_comments_re = re .compile (r"<!--.*?-->" , re .DOTALL )
20
- _strip_tags_re = re .compile (r"<.*?>" , re .DOTALL )
21
-
22
18
23
19
def _simple_escaping_wrapper (func : "t.Callable[_P, str]" ) -> "t.Callable[_P, Markup]" :
24
20
@functools .wraps (func )
@@ -162,10 +158,41 @@ def striptags(self) -> str:
162
158
>>> Markup("Main »\t <em>About</em>").striptags()
163
159
'Main » About'
164
160
"""
165
- # Use two regexes to avoid ambiguous matches.
166
- value = _strip_comments_re .sub ("" , self )
167
- value = _strip_tags_re .sub ("" , value )
168
- value = " " .join (value .split ())
161
+ # collapse spaces
162
+ value = " " .join (self .split ())
163
+
164
+ # Look for comments then tags separately. Otherwise, a comment that
165
+ # contains a tag would end early, leaving some of the comment behind.
166
+
167
+ while True :
168
+ # keep finding comment start marks
169
+ start = value .find ("<!--" )
170
+
171
+ if start == - 1 :
172
+ break
173
+
174
+ # find a comment end mark beyond the start, otherwise stop
175
+ end = value .find ("-->" , start )
176
+
177
+ if end == - 1 :
178
+ break
179
+
180
+ value = f"{ value [:start ]} { value [end + 3 :]} "
181
+
182
+ # remove tags using the same method
183
+ while True :
184
+ start = value .find ("<" )
185
+
186
+ if start == - 1 :
187
+ break
188
+
189
+ end = value .find (">" , start )
190
+
191
+ if end == - 1 :
192
+ break
193
+
194
+ value = f"{ value [:start ]} { value [end + 1 :]} "
195
+
169
196
return self .__class__ (value ).unescape ()
170
197
171
198
@classmethod
0 commit comments