Skip to content

Commit 93730b7

Browse files
committed
fix: 🐛 only use & as a query string separator
bpo-42967: [security] urllib.parse.parse_qsl(): Web cache poisoning - `;` as a query args separator
1 parent 2f12a1b commit 93730b7

File tree

3 files changed

+46
-32
lines changed

3 files changed

+46
-32
lines changed

Lib/test/test_cgi.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,9 @@ def do_test(buf, method):
5353
("", ValueError("bad query field: ''")),
5454
("&", ValueError("bad query field: ''")),
5555
("&&", ValueError("bad query field: ''")),
56-
(";", ValueError("bad query field: ''")),
57-
(";&;", ValueError("bad query field: ''")),
5856
# Should the next few really be valid?
5957
("=", {}),
6058
("=&=", {}),
61-
("=;=", {}),
6259
# This rest seem to make sense
6360
("=a", {'': ['a']}),
6461
("&=a", ValueError("bad query field: ''")),
@@ -73,8 +70,6 @@ def do_test(buf, method):
7370
("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
7471
("a=a+b&a=b+a", {'a': ['a b', 'b a']}),
7572
("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
76-
("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
77-
("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
7873
("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env",
7974
{'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'],
8075
'cuyer': ['r'],

Lib/test/test_urlparse.py

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,6 @@
3232
(b"&a=b", [(b'a', b'b')]),
3333
(b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
3434
(b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
35-
(";", []),
36-
(";;", []),
37-
(";a=b", [('a', 'b')]),
38-
("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
39-
("a=1;a=2", [('a', '1'), ('a', '2')]),
40-
(b";", []),
41-
(b";;", []),
42-
(b";a=b", [(b'a', b'b')]),
43-
(b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
44-
(b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
4535
]
4636

4737
# Each parse_qs testcase is a two-tuple that contains
@@ -68,16 +58,6 @@
6858
(b"&a=b", {b'a': [b'b']}),
6959
(b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
7060
(b"a=1&a=2", {b'a': [b'1', b'2']}),
71-
(";", {}),
72-
(";;", {}),
73-
(";a=b", {'a': ['b']}),
74-
("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
75-
("a=1;a=2", {'a': ['1', '2']}),
76-
(b";", {}),
77-
(b";;", {}),
78-
(b";a=b", {b'a': [b'b']}),
79-
(b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
80-
(b"a=1;a=2", {b'a': [b'1', b'2']}),
8161
]
8262

8363
class UrlParseTestCase(unittest.TestCase):
@@ -886,10 +866,43 @@ def test_parse_qsl_encoding(self):
886866
def test_parse_qsl_max_num_fields(self):
887867
with self.assertRaises(ValueError):
888868
urllib.parse.parse_qs('&'.join(['a=a']*11), max_num_fields=10)
889-
with self.assertRaises(ValueError):
890-
urllib.parse.parse_qs(';'.join(['a=a']*11), max_num_fields=10)
891869
urllib.parse.parse_qs('&'.join(['a=a']*10), max_num_fields=10)
892870

871+
def test_parse_qs_separator(self):
872+
semicolon_cases = [
873+
(";", {}),
874+
(";;", {}),
875+
(";a=b", {'a': ['b']}),
876+
("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
877+
("a=1;a=2", {'a': ['1', '2']}),
878+
(b";", {}),
879+
(b";;", {}),
880+
(b";a=b", {b'a': [b'b']}),
881+
(b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
882+
(b"a=1;a=2", {b'a': [b'1', b'2']}),
883+
]
884+
for orig, expect in semicolon_cases:
885+
result = urllib.parse.parse_qs(orig, separator=';')
886+
self.assertEqual(result, expect, "Error parsing %r" % orig)
887+
888+
889+
def test_parse_qsl_separator(self):
890+
semicolon_cases = [
891+
(";", []),
892+
(";;", []),
893+
(";a=b", [('a', 'b')]),
894+
("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
895+
("a=1;a=2", [('a', '1'), ('a', '2')]),
896+
(b";", []),
897+
(b";;", []),
898+
(b";a=b", [(b'a', b'b')]),
899+
(b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
900+
(b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
901+
]
902+
for orig, expect in semicolon_cases:
903+
result = urllib.parse.parse_qsl(orig, separator=';')
904+
self.assertEqual(result, expect, "Error parsing %r" % orig)
905+
893906
def test_urlencode_sequences(self):
894907
# Other tests incidentally urlencode things; test non-covered cases:
895908
# Sequence and object values.

Lib/urllib/parse.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -662,7 +662,7 @@ def unquote(string, encoding='utf-8', errors='replace'):
662662

663663

664664
def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
665-
encoding='utf-8', errors='replace', max_num_fields=None):
665+
encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
666666
"""Parse a query given as a string argument.
667667
668668
Arguments:
@@ -686,12 +686,15 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
686686
max_num_fields: int. If set, then throws a ValueError if there
687687
are more than n fields read by parse_qsl().
688688
689+
separator: str. The symbol to use for separating the query arguments.
690+
Defaults to &.
691+
689692
Returns a dictionary.
690693
"""
691694
parsed_result = {}
692695
pairs = parse_qsl(qs, keep_blank_values, strict_parsing,
693696
encoding=encoding, errors=errors,
694-
max_num_fields=max_num_fields)
697+
max_num_fields=max_num_fields, separator=separator)
695698
for name, value in pairs:
696699
if name in parsed_result:
697700
parsed_result[name].append(value)
@@ -701,7 +704,7 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
701704

702705

703706
def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
704-
encoding='utf-8', errors='replace', max_num_fields=None):
707+
encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
705708
"""Parse a query given as a string argument.
706709
707710
Arguments:
@@ -724,6 +727,9 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
724727
max_num_fields: int. If set, then throws a ValueError
725728
if there are more than n fields read by parse_qsl().
726729
730+
separator: str. The symbol to use for separating the query arguments.
731+
Defaults to &.
732+
727733
Returns a list, as G-d intended.
728734
"""
729735
qs, _coerce_result = _coerce_args(qs)
@@ -732,11 +738,11 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
732738
# is less than max_num_fields. This prevents a memory exhaustion DOS
733739
# attack via post bodies with many fields.
734740
if max_num_fields is not None:
735-
num_fields = 1 + qs.count('&') + qs.count(';')
741+
num_fields = 1 + qs.count(separator)
736742
if max_num_fields < num_fields:
737743
raise ValueError('Max number of fields exceeded')
738744

739-
pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
745+
pairs = [s1 for s1 in qs.split(separator)]
740746
r = []
741747
for name_value in pairs:
742748
if not name_value and not strict_parsing:

0 commit comments

Comments
 (0)