Skip to content

gh-56166: Deprecate passing counts and maxsplit via position in re #107767

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions Doc/library/re.rst
Original file line number Diff line number Diff line change
Expand Up @@ -886,7 +886,7 @@ Functions
.. versionadded:: 3.4


.. function:: split(pattern, string, maxsplit=0, flags=0)
.. function:: split(pattern, string, \*, maxsplit=0, flags=0)

Split *string* by the occurrences of *pattern*. If capturing parentheses are
used in *pattern*, then the text of all groups in the pattern are also returned
Expand All @@ -898,7 +898,7 @@ Functions
['Words', 'words', 'words', '']
>>> re.split(r'(\W+)', 'Words, words, words.')
['Words', ', ', 'words', ', ', 'words', '.', '']
>>> re.split(r'\W+', 'Words, words, words.', 1)
>>> re.split(r'\W+', 'Words, words, words.', maxsplit=1)
['Words', 'words, words.']
>>> re.split('[a-f]+', '0a3B9', flags=re.IGNORECASE)
['0', '3', '9']
Expand Down Expand Up @@ -963,7 +963,7 @@ Functions
Non-empty matches can now start just after a previous empty match.


.. function:: sub(pattern, repl, string, count=0, flags=0)
.. function:: sub(pattern, repl, string, \*, count=0, flags=0)

Return the string obtained by replacing the leftmost non-overlapping occurrences
of *pattern* in *string* by the replacement *repl*. If the pattern isn't found,
Expand Down Expand Up @@ -1038,7 +1038,7 @@ Functions
in the ASCII range (``b'\x00'``-``b'\x7f'``).


.. function:: subn(pattern, repl, string, count=0, flags=0)
.. function:: subn(pattern, repl, string, \*, count=0, flags=0)

Perform the same operation as :func:`sub`, but return a tuple ``(new_string,
number_of_subs_made)``.
Expand Down
2 changes: 1 addition & 1 deletion Lib/idlelib/idle_test/test_calltip.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ class SB: __call__ = None

p = re.compile('')
tiptest(re.sub, '''\
(pattern, repl, string, count=0, flags=0)
(pattern, repl, string, *_args, count=0, flags=0)
Return the string obtained by replacing the leftmost
non-overlapping occurrences of the pattern in string by the
replacement repl. repl can be either a string or a callable;
Expand Down
60 changes: 57 additions & 3 deletions Lib/re/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ class RegexFlag:
# sre exception
error = _compiler.error

class _ZeroSentinel(int): pass
_zero_sentinel = _ZeroSentinel()

# --------------------------------------------------------------------
# public interface

Expand All @@ -175,16 +178,33 @@ def search(pattern, string, flags=0):
a Match object, or None if no match was found."""
return _compile(pattern, flags).search(string)

def sub(pattern, repl, string, count=0, flags=0):
def sub(pattern, repl, string, *_args, count=_zero_sentinel, flags=_zero_sentinel):
"""Return the string obtained by replacing the leftmost
non-overlapping occurrences of the pattern in string by the
replacement repl. repl can be either a string or a callable;
if a string, backslash escapes in it are processed. If it is
a callable, it's passed the Match object and must return
a replacement string to be used."""
if _args:
if count is not _zero_sentinel:
raise TypeError("sub() got multiple values for argument 'count'")
count, *_args = _args
if _args:
if flags is not _zero_sentinel:
raise TypeError("sub() got multiple values for argument 'flags'")
flags, *_args = _args
if _args:
raise TypeError("sub() takes from 2 to 4 positional arguments "
f"but {4 + len(_args)} were given")

import warnings
warnings.warn(
"'count' is passed as positional argument",
DeprecationWarning, stacklevel=2
)
return _compile(pattern, flags).sub(repl, string, count)

def subn(pattern, repl, string, count=0, flags=0):
def subn(pattern, repl, string, *_args, count=_zero_sentinel, flags=_zero_sentinel):
"""Return a 2-tuple containing (new_string, number).
new_string is the string obtained by replacing the leftmost
non-overlapping occurrences of the pattern in the source
Expand All @@ -193,16 +213,50 @@ def subn(pattern, repl, string, count=0, flags=0):
callable; if a string, backslash escapes in it are processed.
If it is a callable, it's passed the Match object and must
return a replacement string to be used."""
if _args:
if count is not _zero_sentinel:
raise TypeError("subn() got multiple values for argument 'count'")
count, *_args = _args
if _args:
if flags is not _zero_sentinel:
raise TypeError("subn() got multiple values for argument 'flags'")
flags, *_args = _args
if _args:
raise TypeError("subn() takes from 2 to 4 positional arguments "
f"but {4 + len(_args)} were given")

import warnings
warnings.warn(
"'count' is passed as positional argument",
DeprecationWarning, stacklevel=2
)
return _compile(pattern, flags).subn(repl, string, count)

def split(pattern, string, maxsplit=0, flags=0):
def split(pattern, string, *_args, maxsplit=_zero_sentinel, flags=_zero_sentinel):
"""Split the source string by the occurrences of the pattern,
returning a list containing the resulting substrings. If
capturing parentheses are used in pattern, then the text of all
groups in the pattern are also returned as part of the resulting
list. If maxsplit is nonzero, at most maxsplit splits occur,
and the remainder of the string is returned as the final element
of the list."""
if _args:
if maxsplit is not _zero_sentinel:
raise TypeError("split() got multiple values for argument 'maxsplit'")
maxsplit, *_args = _args
if _args:
if flags is not _zero_sentinel:
raise TypeError("split() got multiple values for argument 'flags'")
flags, *_args = _args
if _args:
raise TypeError("split() takes from 2 to 4 positional arguments "
f"but {4 + len(_args)} were given")

import warnings
warnings.warn(
"'maxsplit' is passed as positional argument",
DeprecationWarning, stacklevel=2
)
return _compile(pattern, flags).split(string, maxsplit)

def findall(pattern, string, flags=0):
Expand Down
32 changes: 27 additions & 5 deletions Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,9 @@ def test_basic_re_sub(self):
self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
'9.3 -3 24x100y')
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
'9.3 -3 23x99y')
with self.assertWarns(DeprecationWarning):
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
'9.3 -3 23x99y')
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', count=3),
'9.3 -3 23x99y')

Expand Down Expand Up @@ -235,8 +236,23 @@ def test_sub_template_numeric_escape(self):

def test_qualified_re_sub(self):
self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
with self.assertWarns(DeprecationWarning):
self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
self.assertEqual(re.sub('a', 'b', 'aaaaa', count=1), 'baaaa')
self.assertRaises(TypeError, re.sub, 'a', 'b', 'aaaaa', 1, count=1)
self.assertRaises(TypeError, re.sub, 'a', 'b', 'aaaaa', 1, 0, flags=0)
self.assertRaises(TypeError, re.sub, 'a', 'b', 'aaaaa', 1, 0, 0)

def test_misuse_flags(self):
with self.assertWarns(DeprecationWarning):
result = re.sub('a', 'b', 'aaaaa', re.I)
self.assertEqual(result, re.sub('a', 'b', 'aaaaa', count=int(re.I)))
with self.assertWarns(DeprecationWarning):
result = re.subn("b*", "x", "xyz", re.I)
self.assertEqual(result, re.subn("b*", "x", "xyz", count=int(re.I)))
with self.assertWarns(DeprecationWarning):
result = re.split(":", ":a:b::c", re.I)
self.assertEqual(result, re.split(":", ":a:b::c", maxsplit=int(re.I)))

def test_bug_114660(self):
self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
Expand Down Expand Up @@ -344,9 +360,14 @@ def test_re_subn(self):
self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
with self.assertWarns(DeprecationWarning):
self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
self.assertEqual(re.subn("b*", "x", "xyz", count=2), ('xxxyz', 2))

self.assertRaises(TypeError, re.subn, "b*", "x", "xyz", 2, count=1)
self.assertRaises(TypeError, re.subn, "b*", "x", "xyz", 2, 0, flags=0)
self.assertRaises(TypeError, re.subn, "b*", "x", "xyz", 2, 0, 0)

def test_re_split(self):
for string in ":a:b::c", S(":a:b::c"):
self.assertTypedEqual(re.split(":", string),
Expand Down Expand Up @@ -401,7 +422,8 @@ def test_re_split(self):
self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)

def test_qualified_re_split(self):
self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
with self.assertWarns(DeprecationWarning):
self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
self.assertEqual(re.split(":", ":a:b::c", maxsplit=2), ['', 'a', 'b::c'])
self.assertEqual(re.split(':', 'a:b:c:d', maxsplit=2), ['a', 'b', 'c:d'])
self.assertEqual(re.split("(:)", ":a:b::c", maxsplit=2),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Deprecate passing `count`, `flags` and `maxsplit` via position to :func:`re.sub`, :func:`re.subn` and :func:`re.split`