From 11284d0a2cf5e6ec4d32b583218046e6f0e7f150 Mon Sep 17 00:00:00 2001 From: Daniel Hillier Date: Wed, 23 Apr 2025 11:16:03 +1000 Subject: [PATCH 1/3] b132637: Add failing tests for xpath with default ns provided --- Lib/test/test_xml_etree.py | 56 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 5fe9d6884106ad..404a0355096c8e 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -3168,6 +3168,47 @@ def test_find_xpath(self): self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()-0]') self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()+1]') + def test_find_xpath_namespaces(self): + LINEAR_XML = ''' + + + + + + ''' + e = ET.XML(LINEAR_XML) + nsmap = {"": "X"} + + # Test for numeric indexing and last() + self.assertEqual( + e.find('./tag[1]', namespaces=nsmap).attrib['class'], 'a', + ) + self.assertEqual( + e.find('./tag[2]', namespaces=nsmap).attrib['class'], 'b', + ) + self.assertEqual( + e.find('./tag[last()]', namespaces=nsmap).attrib['class'], 'd', + ) + self.assertEqual( + e.find('./tag[last()-1]', namespaces=nsmap).attrib['class'], 'c', + ) + self.assertEqual( + e.find('./tag[last()-2]', namespaces=nsmap).attrib['class'], 'b', + ) + + self.assertRaisesRegex( + SyntaxError, 'XPath', e.find, './tag[0]', namespaces=nsmap, + ) + self.assertRaisesRegex( + SyntaxError, 'XPath', e.find, './tag[-1]', namespaces=nsmap, + ) + self.assertRaisesRegex( + SyntaxError, 'XPath', e.find, './tag[last()-0]', namespaces=nsmap, + ) + self.assertRaisesRegex( + SyntaxError, 'XPath', e.find, './tag[last()+1]', namespaces=nsmap, + ) + def test_findall(self): e = ET.XML(SAMPLE_XML) e[2] = ET.XML(SAMPLE_SECTION) @@ -3307,6 +3348,21 @@ def test_findall_different_nsmaps(self): self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2) self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1) + def test_findall_default_nsmap_position_predicate(self): + root = ET.XML(''' + + + + + + ''') + nsmap = {'': 'default'} + first_b = root[1] + last_b = root[2] + self.assertEqual(len(root.findall(".//b[1]", namespaces=nsmap)), 2) + self.assertEqual(root.findall(".//b[1]", namespaces=nsmap)[0], first_b) + self.assertEqual(root.findall(".//b[last()]", namespaces=nsmap)[0], last_b) + def test_findall_wildcard(self): root = ET.XML(''' From 1c9f6860049d608b42f1918325d91e7ca9a8433d Mon Sep 17 00:00:00 2001 From: Daniel Hillier Date: Wed, 23 Apr 2025 11:25:00 +1000 Subject: [PATCH 2/3] b132637: Fix positional xpath predicates when default namespace provided --- Lib/xml/etree/ElementPath.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/Lib/xml/etree/ElementPath.py b/Lib/xml/etree/ElementPath.py index dc6bd28c03137d..05e2085916dcf4 100644 --- a/Lib/xml/etree/ElementPath.py +++ b/Lib/xml/etree/ElementPath.py @@ -61,6 +61,7 @@ xpath_tokenizer_re = re.compile( r"(" r"'[^']*'|\"[^\"]*\"|" + r"last\(\)|" # Pick out the only xpath function currently supported r"::|" r"//?|" r"\.\.|" @@ -71,6 +72,9 @@ r"\s+" ) +# Find integers, possibly preceded by - or + +int_re = re.compile(r"[\+\-]?\d+$") + def xpath_tokenizer(pattern, namespaces=None): default_namespace = namespaces.get('') if namespaces else None parsing_attribute = False @@ -85,11 +89,22 @@ def xpath_tokenizer(pattern, namespaces=None): yield ttype, "{%s}%s" % (namespaces[prefix], uri) except KeyError: raise SyntaxError("prefix %r not found in prefix map" % prefix) from None - elif default_namespace and not parsing_attribute: + # We don't preprend the default_namespace when: + # - the tag is an attribute as the xml spec says default namespaces + # don't apply to attributes + # - when the tag is a number, possibly preceded by - or +, as these + # are not valid characters to start a tag with and are probably + # used as positional predicates. + elif default_namespace and not (parsing_attribute or int_re.match(tag)): yield ttype, "{%s}%s" % (default_namespace, tag) else: yield token parsing_attribute = False + elif ttype == 'last()': + # Break the found 'last()' part into the separate 'tag' and 'ttype' + # separate returned values expected from this generator + yield ('', 'last') + yield ('()', '') else: yield token parsing_attribute = ttype == '@' @@ -266,7 +281,7 @@ def select_negated(context, result): if (attr_value := elem.get(key)) is not None and attr_value != value: yield elem return select_negated if '!=' in signature else select - if signature == "-" and not re.match(r"\-?\d+$", predicate[0]): + if signature == "-" and not int_re.match(predicate[0]): # [tag] tag = predicate[0] def select(context, result): @@ -276,7 +291,7 @@ def select(context, result): return select if signature == ".='" or signature == ".!='" or ( (signature == "-='" or signature == "-!='") - and not re.match(r"\-?\d+$", predicate[0])): + and not int_re.match(predicate[0])): # [.='value'] or [tag='value'] or [.!='value'] or [tag!='value'] tag = predicate[0] value = predicate[-1] From 3c2aacc4d69e8868988359d2fb7e182754176a07 Mon Sep 17 00:00:00 2001 From: Daniel Hillier Date: Wed, 23 Apr 2025 11:57:14 +1000 Subject: [PATCH 3/3] Add news entry --- .../Library/2025-04-23-11-57-05.gh-issue-132637.i5a8nM.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-04-23-11-57-05.gh-issue-132637.i5a8nM.rst diff --git a/Misc/NEWS.d/next/Library/2025-04-23-11-57-05.gh-issue-132637.i5a8nM.rst b/Misc/NEWS.d/next/Library/2025-04-23-11-57-05.gh-issue-132637.i5a8nM.rst new file mode 100644 index 00000000000000..8ab2a9df071b7a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-04-23-11-57-05.gh-issue-132637.i5a8nM.rst @@ -0,0 +1,3 @@ +Fix ``xml.etree.ElementPath.xpath_tokenizer`` to correctly handle +positional predicates when a default namespace is provided in the +``namespaces`` argument.