Skip to content

Commit fa670a5

Browse files
[3.11] gh-74668: Fix support of bytes in urllib.parse.parse_qsl() (GH-115771) (GH-116367)
urllib.parse functions parse_qs() and parse_qsl() now support bytes arguments containing raw and percent-encoded non-ASCII data. (cherry picked from commit bdba8ef) Co-authored-by: Serhiy Storchaka <[email protected]>
1 parent c75df4b commit fa670a5

File tree

3 files changed

+64
-26
lines changed

3 files changed

+64
-26
lines changed

Lib/test/test_urlparse.py

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
("=a", [('', 'a')]),
2020
("a", [('a', '')]),
2121
("a=", [('a', '')]),
22+
("a=b=c", [('a', 'b=c')]),
23+
("a%3Db=c", [('a=b', 'c')]),
24+
("a=b&c=d", [('a', 'b'), ('c', 'd')]),
25+
("a=b%26c=d", [('a', 'b&c=d')]),
2226
("&a=b", [('a', 'b')]),
2327
("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
2428
("a=1&a=2", [('a', '1'), ('a', '2')]),
@@ -29,13 +33,25 @@
2933
(b"=a", [(b'', b'a')]),
3034
(b"a", [(b'a', b'')]),
3135
(b"a=", [(b'a', b'')]),
36+
(b"a=b=c", [(b'a', b'b=c')]),
37+
(b"a%3Db=c", [(b'a=b', b'c')]),
38+
(b"a=b&c=d", [(b'a', b'b'), (b'c', b'd')]),
39+
(b"a=b%26c=d", [(b'a', b'b&c=d')]),
3240
(b"&a=b", [(b'a', b'b')]),
3341
(b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
3442
(b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
3543
(";a=b", [(';a', 'b')]),
3644
("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
3745
(b";a=b", [(b';a', b'b')]),
3846
(b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
47+
48+
("\u0141=\xE9", [('\u0141', '\xE9')]),
49+
("%C5%81=%C3%A9", [('\u0141', '\xE9')]),
50+
("%81=%A9", [('\ufffd', '\ufffd')]),
51+
(b"\xc5\x81=\xc3\xa9", [(b'\xc5\x81', b'\xc3\xa9')]),
52+
(b"%C5%81=%C3%A9", [(b'\xc5\x81', b'\xc3\xa9')]),
53+
(b"\x81=\xA9", [(b'\x81', b'\xa9')]),
54+
(b"%81=%A9", [(b'\x81', b'\xa9')]),
3955
]
4056

4157
# Each parse_qs testcase is a two-tuple that contains
@@ -49,6 +65,10 @@
4965
("=a", {'': ['a']}),
5066
("a", {'a': ['']}),
5167
("a=", {'a': ['']}),
68+
("a=b=c", {'a': ['b=c']}),
69+
("a%3Db=c", {'a=b': ['c']}),
70+
("a=b&c=d", {'a': ['b'], 'c': ['d']}),
71+
("a=b%26c=d", {'a': ['b&c=d']}),
5272
("&a=b", {'a': ['b']}),
5373
("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
5474
("a=1&a=2", {'a': ['1', '2']}),
@@ -59,13 +79,26 @@
5979
(b"=a", {b'': [b'a']}),
6080
(b"a", {b'a': [b'']}),
6181
(b"a=", {b'a': [b'']}),
82+
(b"a=b=c", {b'a': [b'b=c']}),
83+
(b"a%3Db=c", {b'a=b': [b'c']}),
84+
(b"a=b&c=d", {b'a': [b'b'], b'c': [b'd']}),
85+
(b"a=b%26c=d", {b'a': [b'b&c=d']}),
6286
(b"&a=b", {b'a': [b'b']}),
6387
(b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
6488
(b"a=1&a=2", {b'a': [b'1', b'2']}),
6589
(";a=b", {';a': ['b']}),
6690
("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
6791
(b";a=b", {b';a': [b'b']}),
6892
(b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}),
93+
(b"a=a%E2%80%99b", {b'a': [b'a\xe2\x80\x99b']}),
94+
95+
("\u0141=\xE9", {'\u0141': ['\xE9']}),
96+
("%C5%81=%C3%A9", {'\u0141': ['\xE9']}),
97+
("%81=%A9", {'\ufffd': ['\ufffd']}),
98+
(b"\xc5\x81=\xc3\xa9", {b'\xc5\x81': [b'\xc3\xa9']}),
99+
(b"%C5%81=%C3%A9", {b'\xc5\x81': [b'\xc3\xa9']}),
100+
(b"\x81=\xA9", {b'\x81': [b'\xa9']}),
101+
(b"%81=%A9", {b'\x81': [b'\xa9']}),
69102
]
70103

71104
class UrlParseTestCase(unittest.TestCase):
@@ -990,8 +1023,8 @@ def test_parse_qsl_encoding(self):
9901023

9911024
def test_parse_qsl_max_num_fields(self):
9921025
with self.assertRaises(ValueError):
993-
urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
994-
urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
1026+
urllib.parse.parse_qsl('&'.join(['a=a']*11), max_num_fields=10)
1027+
urllib.parse.parse_qsl('&'.join(['a=a']*10), max_num_fields=10)
9951028

9961029
def test_parse_qs_separator(self):
9971030
parse_qs_semicolon_cases = [

Lib/urllib/parse.py

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -755,42 +755,44 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
755755
756756
Returns a list, as G-d intended.
757757
"""
758-
qs, _coerce_result = _coerce_args(qs)
759-
separator, _ = _coerce_args(separator)
760758

761-
if not separator or (not isinstance(separator, (str, bytes))):
759+
if not separator or not isinstance(separator, (str, bytes)):
762760
raise ValueError("Separator must be of type string or bytes.")
761+
if isinstance(qs, str):
762+
if not isinstance(separator, str):
763+
separator = str(separator, 'ascii')
764+
eq = '='
765+
def _unquote(s):
766+
return unquote_plus(s, encoding=encoding, errors=errors)
767+
else:
768+
qs = bytes(qs)
769+
if isinstance(separator, str):
770+
separator = bytes(separator, 'ascii')
771+
eq = b'='
772+
def _unquote(s):
773+
return unquote_to_bytes(s.replace(b'+', b' '))
774+
775+
if not qs:
776+
return []
763777

764778
# If max_num_fields is defined then check that the number of fields
765779
# is less than max_num_fields. This prevents a memory exhaustion DOS
766780
# attack via post bodies with many fields.
767781
if max_num_fields is not None:
768-
num_fields = 1 + qs.count(separator) if qs else 0
782+
num_fields = 1 + qs.count(separator)
769783
if max_num_fields < num_fields:
770784
raise ValueError('Max number of fields exceeded')
771785

772786
r = []
773-
query_args = qs.split(separator) if qs else []
774-
for name_value in query_args:
775-
if not name_value and not strict_parsing:
776-
continue
777-
nv = name_value.split('=', 1)
778-
if len(nv) != 2:
779-
if strict_parsing:
787+
for name_value in qs.split(separator):
788+
if name_value or strict_parsing:
789+
name, has_eq, value = name_value.partition(eq)
790+
if not has_eq and strict_parsing:
780791
raise ValueError("bad query field: %r" % (name_value,))
781-
# Handle case of a control-name with no equal sign
782-
if keep_blank_values:
783-
nv.append('')
784-
else:
785-
continue
786-
if len(nv[1]) or keep_blank_values:
787-
name = nv[0].replace('+', ' ')
788-
name = unquote(name, encoding=encoding, errors=errors)
789-
name = _coerce_result(name)
790-
value = nv[1].replace('+', ' ')
791-
value = unquote(value, encoding=encoding, errors=errors)
792-
value = _coerce_result(value)
793-
r.append((name, value))
792+
if value or keep_blank_values:
793+
name = _unquote(name)
794+
value = _unquote(value)
795+
r.append((name, value))
794796
return r
795797

796798
def unquote_plus(string, encoding='utf-8', errors='replace'):
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
:mod:`urllib.parse` functions :func:`~urllib.parse.parse_qs` and
2+
:func:`~urllib.parse.parse_qsl` now support bytes arguments containing raw
3+
and percent-encoded non-ASCII data.

0 commit comments

Comments
 (0)