Skip to content
Open
107 changes: 53 additions & 54 deletions Lib/re/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,17 +164,17 @@ class RegexFlag:
def match(pattern, string, flags=0):
"""Try to apply the pattern at the start of the string, returning
a Match object, or None if no match was found."""
return _compile(pattern, flags).match(string)
return compile(pattern, flags).match(string)

def fullmatch(pattern, string, flags=0):
"""Try to apply the pattern to all of the string, returning
a Match object, or None if no match was found."""
return _compile(pattern, flags).fullmatch(string)
return compile(pattern, flags).fullmatch(string)

def search(pattern, string, flags=0):
"""Scan through string looking for a match to the pattern, returning
a Match object, or None if no match was found."""
return _compile(pattern, flags).search(string)
return compile(pattern, flags).search(string)

class _ZeroSentinel(int):
pass
Expand Down Expand Up @@ -205,7 +205,7 @@ def sub(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentinel
DeprecationWarning, stacklevel=2
)

return _compile(pattern, flags).sub(repl, string, count)
return compile(pattern, flags).sub(repl, string, count)
sub.__text_signature__ = '(pattern, repl, string, count=0, flags=0)'

def subn(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentinel):
Expand Down Expand Up @@ -235,7 +235,7 @@ def subn(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentine
DeprecationWarning, stacklevel=2
)

return _compile(pattern, flags).subn(repl, string, count)
return compile(pattern, flags).subn(repl, string, count)
subn.__text_signature__ = '(pattern, repl, string, count=0, flags=0)'

def split(pattern, string, *args, maxsplit=_zero_sentinel, flags=_zero_sentinel):
Expand Down Expand Up @@ -264,7 +264,7 @@ def split(pattern, string, *args, maxsplit=_zero_sentinel, flags=_zero_sentinel)
DeprecationWarning, stacklevel=2
)

return _compile(pattern, flags).split(string, maxsplit)
return compile(pattern, flags).split(string, maxsplit)
split.__text_signature__ = '(pattern, string, maxsplit=0, flags=0)'

def findall(pattern, string, flags=0):
Expand All @@ -275,60 +275,17 @@ def findall(pattern, string, flags=0):
has more than one group.

Empty matches are included in the result."""
return _compile(pattern, flags).findall(string)
return compile(pattern, flags).findall(string)

def finditer(pattern, string, flags=0):
"""Return an iterator over all non-overlapping matches in the
string. For each match, the iterator returns a Match object.

Empty matches are included in the result."""
return _compile(pattern, flags).finditer(string)
return compile(pattern, flags).finditer(string)

def compile(pattern, flags=0):
"Compile a regular expression pattern, returning a Pattern object."
return _compile(pattern, flags)

def purge():
"Clear the regular expression caches"
_cache.clear()
_cache2.clear()
_compile_template.cache_clear()


# SPECIAL_CHARS
# closing ')', '}' and ']'
# '-' (a range in character set)
# '&', '~', (extended character set operations)
# '#' (comment) and WHITESPACE (ignored) in verbose mode
_special_chars_map = {i: '\\' + chr(i) for i in b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f'}

def escape(pattern):
"""
Escape special characters in a string.
"""
if isinstance(pattern, str):
return pattern.translate(_special_chars_map)
else:
pattern = str(pattern, 'latin1')
return pattern.translate(_special_chars_map).encode('latin1')

Pattern = type(_compiler.compile('', 0))
Match = type(_compiler.compile('', 0).match(''))

# --------------------------------------------------------------------
# internals

# Use the fact that dict keeps the insertion order.
# _cache2 uses the simple FIFO policy which has better latency.
# _cache uses the LRU policy which has better hit rate.
_cache = {} # LRU
_cache2 = {} # FIFO
_MAXCACHE = 512
_MAXCACHE2 = 256
assert _MAXCACHE2 < _MAXCACHE

def _compile(pattern, flags):
# internal: compile pattern
"""Compile a regular expression pattern, returning a Pattern object."""
if isinstance(flags, RegexFlag):
flags = flags.value
try:
Expand Down Expand Up @@ -371,6 +328,45 @@ def _compile(pattern, flags):
_cache2[key] = p
return p

def purge():
"Clear the regular expression caches"
_cache.clear()
_cache2.clear()
_compile_template.cache_clear()


# SPECIAL_CHARS
# closing ')', '}' and ']'
# '-' (a range in character set)
# '&', '~', (extended character set operations)
# '#' (comment) and WHITESPACE (ignored) in verbose mode
_special_chars_map = {i: '\\' + chr(i) for i in b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f'}

def escape(pattern):
"""
Escape special characters in a string.
"""
if isinstance(pattern, str):
return pattern.translate(_special_chars_map)
else:
pattern = str(pattern, 'latin1')
return pattern.translate(_special_chars_map).encode('latin1')

Pattern = type(_compiler.compile('', 0))
Match = type(_compiler.compile('', 0).match(''))

# --------------------------------------------------------------------
# internals

# Use the fact that dict keeps the insertion order.
# _cache2 uses the simple FIFO policy which has better latency.
# _cache uses the LRU policy which has better hit rate.
_cache = {} # LRU
_cache2 = {} # FIFO
_MAXCACHE = 512
_MAXCACHE2 = 256
assert _MAXCACHE2 < _MAXCACHE

@functools.lru_cache(_MAXCACHE)
def _compile_template(pattern, repl):
# internal: compile replacement pattern
Expand All @@ -381,9 +377,12 @@ def _compile_template(pattern, repl):
import copyreg

def _pickle(p):
return _compile, (p.pattern, p.flags)
return compile, (p.pattern, p.flags)

# compatibility alias to deserialize old pickles
_compile = compile

copyreg.pickle(Pattern, _pickle, _compile)
copyreg.pickle(Pattern, _pickle, compile)

# --------------------------------------------------------------------
# experimental stuff (see python-dev discussions for details)
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -1227,7 +1227,7 @@ def test_pickling(self):
pickled = pickle.dumps(oldpat, proto)
newpat = pickle.loads(pickled)
self.assertEqual(newpat, oldpat)
# current pickle expects the _compile() reconstructor in re module
# previous pickles may expect the _compile() reconstructor in re module
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it worth adding a test for pickles that expect _compile()? ("No" is a valid answer)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added it in df79dd5

from re import _compile # noqa: F401

def test_copying(self):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Remove re._compile, leaving a compatibility alias to re.compile.
Loading