Skip to content

Commit a24df39

Browse files
authored
improve striptags performance (#413)
2 parents 4c397ef + 750e22b commit a24df39

File tree

3 files changed

+39
-9
lines changed

3 files changed

+39
-9
lines changed

CHANGES.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@ Version 2.1.4
33

44
Unreleased
55

6+
- Don't use regular expressions for ``striptags``, avoiding a performance
7+
issue. :pr:`413`
8+
69

710
Version 2.1.3
811
-------------

src/markupsafe/__init__.py

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import functools
2-
import re
32
import string
43
import sys
54
import typing as t
@@ -16,9 +15,6 @@ def __html__(self) -> str:
1615

1716
__version__ = "2.1.4.dev"
1817

19-
_strip_comments_re = re.compile(r"<!--.*?-->", re.DOTALL)
20-
_strip_tags_re = re.compile(r"<.*?>", re.DOTALL)
21-
2218

2319
def _simple_escaping_wrapper(func: "t.Callable[_P, str]") -> "t.Callable[_P, Markup]":
2420
@functools.wraps(func)
@@ -162,10 +158,41 @@ def striptags(self) -> str:
162158
>>> Markup("Main &raquo;\t<em>About</em>").striptags()
163159
'Main » About'
164160
"""
165-
# Use two regexes to avoid ambiguous matches.
166-
value = _strip_comments_re.sub("", self)
167-
value = _strip_tags_re.sub("", value)
168-
value = " ".join(value.split())
161+
# collapse spaces
162+
value = " ".join(self.split())
163+
164+
# Look for comments then tags separately. Otherwise, a comment that
165+
# contains a tag would end early, leaving some of the comment behind.
166+
167+
while True:
168+
# keep finding comment start marks
169+
start = value.find("<!--")
170+
171+
if start == -1:
172+
break
173+
174+
# find a comment end mark beyond the start, otherwise stop
175+
end = value.find("-->", start)
176+
177+
if end == -1:
178+
break
179+
180+
value = f"{value[:start]}{value[end + 3:]}"
181+
182+
# remove tags using the same method
183+
while True:
184+
start = value.find("<")
185+
186+
if start == -1:
187+
break
188+
189+
end = value.find(">", start)
190+
191+
if end == -1:
192+
break
193+
194+
value = f"{value[:start]}{value[end + 1:]}"
195+
169196
return self.__class__(value).unescape()
170197

171198
@classmethod

tox.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tox]
22
envlist =
3-
py3{12,11,10,9,8,7}
3+
py3{12,11,10,9,8}
44
pypy310
55
style
66
typing

0 commit comments

Comments
 (0)