diff --git a/Doc/library/re.rst b/Doc/library/re.rst index 3f03f0341d8166..e5d7ff08f2ee6d 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -886,7 +886,7 @@ Functions .. versionadded:: 3.4 -.. function:: split(pattern, string, maxsplit=0, flags=0) +.. function:: split(pattern, string, \*, maxsplit=0, flags=0) Split *string* by the occurrences of *pattern*. If capturing parentheses are used in *pattern*, then the text of all groups in the pattern are also returned @@ -898,7 +898,7 @@ Functions ['Words', 'words', 'words', ''] >>> re.split(r'(\W+)', 'Words, words, words.') ['Words', ', ', 'words', ', ', 'words', '.', ''] - >>> re.split(r'\W+', 'Words, words, words.', 1) + >>> re.split(r'\W+', 'Words, words, words.', maxsplit=1) ['Words', 'words, words.'] >>> re.split('[a-f]+', '0a3B9', flags=re.IGNORECASE) ['0', '3', '9'] @@ -963,7 +963,7 @@ Functions Non-empty matches can now start just after a previous empty match. -.. function:: sub(pattern, repl, string, count=0, flags=0) +.. function:: sub(pattern, repl, string, \*, count=0, flags=0) Return the string obtained by replacing the leftmost non-overlapping occurrences of *pattern* in *string* by the replacement *repl*. If the pattern isn't found, @@ -1038,7 +1038,7 @@ Functions in the ASCII range (``b'\x00'``-``b'\x7f'``). -.. function:: subn(pattern, repl, string, count=0, flags=0) +.. function:: subn(pattern, repl, string, \*, count=0, flags=0) Perform the same operation as :func:`sub`, but return a tuple ``(new_string, number_of_subs_made)``. diff --git a/Lib/idlelib/idle_test/test_calltip.py b/Lib/idlelib/idle_test/test_calltip.py index 1ccb63b9dbd65f..ea5a609fae006a 100644 --- a/Lib/idlelib/idle_test/test_calltip.py +++ b/Lib/idlelib/idle_test/test_calltip.py @@ -84,7 +84,7 @@ class SB: __call__ = None p = re.compile('') tiptest(re.sub, '''\ -(pattern, repl, string, count=0, flags=0) +(pattern, repl, string, *_args, count=0, flags=0) Return the string obtained by replacing the leftmost non-overlapping occurrences of the pattern in string by the replacement repl. repl can be either a string or a callable; diff --git a/Lib/re/__init__.py b/Lib/re/__init__.py index d6fccd5bc97cc0..a136a5cb9adb8a 100644 --- a/Lib/re/__init__.py +++ b/Lib/re/__init__.py @@ -157,6 +157,9 @@ class RegexFlag: # sre exception error = _compiler.error +class _ZeroSentinel(int): pass +_zero_sentinel = _ZeroSentinel() + # -------------------------------------------------------------------- # public interface @@ -175,16 +178,33 @@ def search(pattern, string, flags=0): a Match object, or None if no match was found.""" return _compile(pattern, flags).search(string) -def sub(pattern, repl, string, count=0, flags=0): +def sub(pattern, repl, string, *_args, count=_zero_sentinel, flags=_zero_sentinel): """Return the string obtained by replacing the leftmost non-overlapping occurrences of the pattern in string by the replacement repl. repl can be either a string or a callable; if a string, backslash escapes in it are processed. If it is a callable, it's passed the Match object and must return a replacement string to be used.""" + if _args: + if count is not _zero_sentinel: + raise TypeError("sub() got multiple values for argument 'count'") + count, *_args = _args + if _args: + if flags is not _zero_sentinel: + raise TypeError("sub() got multiple values for argument 'flags'") + flags, *_args = _args + if _args: + raise TypeError("sub() takes from 2 to 4 positional arguments " + f"but {4 + len(_args)} were given") + + import warnings + warnings.warn( + "'count' is passed as positional argument", + DeprecationWarning, stacklevel=2 + ) return _compile(pattern, flags).sub(repl, string, count) -def subn(pattern, repl, string, count=0, flags=0): +def subn(pattern, repl, string, *_args, count=_zero_sentinel, flags=_zero_sentinel): """Return a 2-tuple containing (new_string, number). new_string is the string obtained by replacing the leftmost non-overlapping occurrences of the pattern in the source @@ -193,9 +213,26 @@ def subn(pattern, repl, string, count=0, flags=0): callable; if a string, backslash escapes in it are processed. If it is a callable, it's passed the Match object and must return a replacement string to be used.""" + if _args: + if count is not _zero_sentinel: + raise TypeError("subn() got multiple values for argument 'count'") + count, *_args = _args + if _args: + if flags is not _zero_sentinel: + raise TypeError("subn() got multiple values for argument 'flags'") + flags, *_args = _args + if _args: + raise TypeError("subn() takes from 2 to 4 positional arguments " + f"but {4 + len(_args)} were given") + + import warnings + warnings.warn( + "'count' is passed as positional argument", + DeprecationWarning, stacklevel=2 + ) return _compile(pattern, flags).subn(repl, string, count) -def split(pattern, string, maxsplit=0, flags=0): +def split(pattern, string, *_args, maxsplit=_zero_sentinel, flags=_zero_sentinel): """Split the source string by the occurrences of the pattern, returning a list containing the resulting substrings. If capturing parentheses are used in pattern, then the text of all @@ -203,6 +240,23 @@ def split(pattern, string, maxsplit=0, flags=0): list. If maxsplit is nonzero, at most maxsplit splits occur, and the remainder of the string is returned as the final element of the list.""" + if _args: + if maxsplit is not _zero_sentinel: + raise TypeError("split() got multiple values for argument 'maxsplit'") + maxsplit, *_args = _args + if _args: + if flags is not _zero_sentinel: + raise TypeError("split() got multiple values for argument 'flags'") + flags, *_args = _args + if _args: + raise TypeError("split() takes from 2 to 4 positional arguments " + f"but {4 + len(_args)} were given") + + import warnings + warnings.warn( + "'maxsplit' is passed as positional argument", + DeprecationWarning, stacklevel=2 + ) return _compile(pattern, flags).split(string, maxsplit) def findall(pattern, string, flags=0): diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index a6f5af17d7d51b..ed7e0a66bf5c91 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -127,8 +127,9 @@ def test_basic_re_sub(self): self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x') self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'), '9.3 -3 24x100y') - self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3), - '9.3 -3 23x99y') + with self.assertWarns(DeprecationWarning): + self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3), + '9.3 -3 23x99y') self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', count=3), '9.3 -3 23x99y') @@ -235,8 +236,23 @@ def test_sub_template_numeric_escape(self): def test_qualified_re_sub(self): self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb') - self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa') + with self.assertWarns(DeprecationWarning): + self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa') self.assertEqual(re.sub('a', 'b', 'aaaaa', count=1), 'baaaa') + self.assertRaises(TypeError, re.sub, 'a', 'b', 'aaaaa', 1, count=1) + self.assertRaises(TypeError, re.sub, 'a', 'b', 'aaaaa', 1, 0, flags=0) + self.assertRaises(TypeError, re.sub, 'a', 'b', 'aaaaa', 1, 0, 0) + + def test_misuse_flags(self): + with self.assertWarns(DeprecationWarning): + result = re.sub('a', 'b', 'aaaaa', re.I) + self.assertEqual(result, re.sub('a', 'b', 'aaaaa', count=int(re.I))) + with self.assertWarns(DeprecationWarning): + result = re.subn("b*", "x", "xyz", re.I) + self.assertEqual(result, re.subn("b*", "x", "xyz", count=int(re.I))) + with self.assertWarns(DeprecationWarning): + result = re.split(":", ":a:b::c", re.I) + self.assertEqual(result, re.split(":", ":a:b::c", maxsplit=int(re.I))) def test_bug_114660(self): self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'), @@ -344,9 +360,14 @@ def test_re_subn(self): self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1)) self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0)) self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4)) - self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2)) + with self.assertWarns(DeprecationWarning): + self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2)) self.assertEqual(re.subn("b*", "x", "xyz", count=2), ('xxxyz', 2)) + self.assertRaises(TypeError, re.subn, "b*", "x", "xyz", 2, count=1) + self.assertRaises(TypeError, re.subn, "b*", "x", "xyz", 2, 0, flags=0) + self.assertRaises(TypeError, re.subn, "b*", "x", "xyz", 2, 0, 0) + def test_re_split(self): for string in ":a:b::c", S(":a:b::c"): self.assertTypedEqual(re.split(":", string), @@ -401,7 +422,8 @@ def test_re_split(self): self.assertTypedEqual(re.split(sep, ':a:b::c'), expected) def test_qualified_re_split(self): - self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c']) + with self.assertWarns(DeprecationWarning): + self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c']) self.assertEqual(re.split(":", ":a:b::c", maxsplit=2), ['', 'a', 'b::c']) self.assertEqual(re.split(':', 'a:b:c:d', maxsplit=2), ['a', 'b', 'c:d']) self.assertEqual(re.split("(:)", ":a:b::c", maxsplit=2), diff --git a/Misc/NEWS.d/next/Library/2023-08-08-08-46-12.gh-issue-56166.3LIWOO.rst b/Misc/NEWS.d/next/Library/2023-08-08-08-46-12.gh-issue-56166.3LIWOO.rst new file mode 100644 index 00000000000000..c2717e8bbbb14c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-08-08-08-46-12.gh-issue-56166.3LIWOO.rst @@ -0,0 +1 @@ +Deprecate passing `count`, `flags` and `maxsplit` via position to :func:`re.sub`, :func:`re.subn` and :func:`re.split`