|
34 | 34 | except UnicodeEncodeError:
|
35 | 35 | raise unittest.SkipTest("filename is not encodable to utf8")
|
36 | 36 | SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
|
| 37 | +UTF8_BUG_XMLFILE = findfile("expat224_utf8_bug.xml", subdir="xmltestdata") |
37 | 38 |
|
38 | 39 | SAMPLE_XML = """\
|
39 | 40 | <body>
|
@@ -1739,6 +1740,37 @@ def __eq__(self, other):
|
1739 | 1740 | self.assertIsInstance(e[0].tag, str)
|
1740 | 1741 | self.assertEqual(e[0].tag, 'changed')
|
1741 | 1742 |
|
| 1743 | + def check_expat224_utf8_bug(self, text): |
| 1744 | + xml = b'<a b="%s"/>' % text |
| 1745 | + root = ET.XML(xml) |
| 1746 | + self.assertEqual(root.get('b'), text.decode('utf-8')) |
| 1747 | + |
| 1748 | + def test_expat224_utf8_bug(self): |
| 1749 | + # bpo-31170: Expat 2.2.3 had a bug in its UTF-8 decoder. |
| 1750 | + # Check that Expat 2.2.4 fixed the bug. |
| 1751 | + # |
| 1752 | + # Test buffer bounds at odd and even positions. |
| 1753 | + |
| 1754 | + text = b'\xc3\xa0' * 1024 |
| 1755 | + self.check_expat224_utf8_bug(text) |
| 1756 | + |
| 1757 | + text = b'x' + b'\xc3\xa0' * 1024 |
| 1758 | + self.check_expat224_utf8_bug(text) |
| 1759 | + |
| 1760 | + def test_expat224_utf8_bug_file(self): |
| 1761 | + with open(UTF8_BUG_XMLFILE, 'rb') as fp: |
| 1762 | + raw = fp.read() |
| 1763 | + root = ET.fromstring(raw) |
| 1764 | + xmlattr = root.get('b') |
| 1765 | + |
| 1766 | + # "Parse" manually the XML file to extract the value of the 'b' |
| 1767 | + # attribute of the <a b='xxx' /> XML element |
| 1768 | + text = raw.decode('utf-8').strip() |
| 1769 | + text = text.replace('\r\n', ' ') |
| 1770 | + text = text[6:-4] |
| 1771 | + self.assertEqual(root.get('b'), text) |
| 1772 | + |
| 1773 | + |
1742 | 1774 |
|
1743 | 1775 | # --------------------------------------------------------------------
|
1744 | 1776 |
|
|
0 commit comments