|
30 | 30 |
|
31 | 31 | SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
|
32 | 32 | SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
|
| 33 | +UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata") |
33 | 34 |
|
34 | 35 | SAMPLE_XML = """\
|
35 | 36 | <body>
|
@@ -1494,6 +1495,36 @@ def test_issue10777(self):
|
1494 | 1495 | ET.register_namespace('test10777', 'http://myuri/')
|
1495 | 1496 | ET.register_namespace('test10777', 'http://myuri/')
|
1496 | 1497 |
|
| 1498 | + def check_expat224_utf8_bug(self, text): |
| 1499 | + xml = b'<a b="%s"/>' % text |
| 1500 | + root = ET.XML(xml) |
| 1501 | + self.assertEqual(root.get('b'), text.decode('utf-8')) |
| 1502 | + |
| 1503 | + def test_expat224_utf8_bug(self): |
| 1504 | + # bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder. |
| 1505 | + # Check that Expat 2.2.4 fixed the bug. |
| 1506 | + # |
| 1507 | + # Test buffer bounds at odd and even positions. |
| 1508 | + |
| 1509 | + text = b'\xc3\xa0' * 1024 |
| 1510 | + self.check_expat224_utf8_bug(text) |
| 1511 | + |
| 1512 | + text = b'x' + b'\xc3\xa0' * 1024 |
| 1513 | + self.check_expat224_utf8_bug(text) |
| 1514 | + |
| 1515 | + def test_expat224_utf8_bug_file(self): |
| 1516 | + with open(UTF8_BUG_XMLFILE, 'rb') as fp: |
| 1517 | + raw = fp.read() |
| 1518 | + root = ET.fromstring(raw) |
| 1519 | + xmlattr = root.get('b') |
| 1520 | + |
| 1521 | + # "Parse" manually the XML file to extract the value of the 'b' |
| 1522 | + # attribute of the <a b='xxx' /> XML element |
| 1523 | + text = raw.decode('utf-8').strip() |
| 1524 | + text = text.replace('\r\n', ' ') |
| 1525 | + text = text[6:-4] |
| 1526 | + self.assertEqual(root.get('b'), text) |
| 1527 | + |
1497 | 1528 |
|
1498 | 1529 | # --------------------------------------------------------------------
|
1499 | 1530 |
|
|
0 commit comments