diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 5fe9d6884106ad..404a0355096c8e 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -3168,6 +3168,47 @@ def test_find_xpath(self):
self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()-0]')
self.assertRaisesRegex(SyntaxError, 'XPath', e.find, './tag[last()+1]')
+ def test_find_xpath_namespaces(self):
+ LINEAR_XML = '''
+
+
+
+
+
+ '''
+ e = ET.XML(LINEAR_XML)
+ nsmap = {"": "X"}
+
+ # Test for numeric indexing and last()
+ self.assertEqual(
+ e.find('./tag[1]', namespaces=nsmap).attrib['class'], 'a',
+ )
+ self.assertEqual(
+ e.find('./tag[2]', namespaces=nsmap).attrib['class'], 'b',
+ )
+ self.assertEqual(
+ e.find('./tag[last()]', namespaces=nsmap).attrib['class'], 'd',
+ )
+ self.assertEqual(
+ e.find('./tag[last()-1]', namespaces=nsmap).attrib['class'], 'c',
+ )
+ self.assertEqual(
+ e.find('./tag[last()-2]', namespaces=nsmap).attrib['class'], 'b',
+ )
+
+ self.assertRaisesRegex(
+ SyntaxError, 'XPath', e.find, './tag[0]', namespaces=nsmap,
+ )
+ self.assertRaisesRegex(
+ SyntaxError, 'XPath', e.find, './tag[-1]', namespaces=nsmap,
+ )
+ self.assertRaisesRegex(
+ SyntaxError, 'XPath', e.find, './tag[last()-0]', namespaces=nsmap,
+ )
+ self.assertRaisesRegex(
+ SyntaxError, 'XPath', e.find, './tag[last()+1]', namespaces=nsmap,
+ )
+
def test_findall(self):
e = ET.XML(SAMPLE_XML)
e[2] = ET.XML(SAMPLE_SECTION)
@@ -3307,6 +3348,21 @@ def test_findall_different_nsmaps(self):
self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2)
self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1)
+ def test_findall_default_nsmap_position_predicate(self):
+ root = ET.XML('''
+
+
+
+
+
+ ''')
+ nsmap = {'': 'default'}
+ first_b = root[1]
+ last_b = root[2]
+ self.assertEqual(len(root.findall(".//b[1]", namespaces=nsmap)), 2)
+ self.assertEqual(root.findall(".//b[1]", namespaces=nsmap)[0], first_b)
+ self.assertEqual(root.findall(".//b[last()]", namespaces=nsmap)[0], last_b)
+
def test_findall_wildcard(self):
root = ET.XML('''
diff --git a/Lib/xml/etree/ElementPath.py b/Lib/xml/etree/ElementPath.py
index dc6bd28c03137d..05e2085916dcf4 100644
--- a/Lib/xml/etree/ElementPath.py
+++ b/Lib/xml/etree/ElementPath.py
@@ -61,6 +61,7 @@
xpath_tokenizer_re = re.compile(
r"("
r"'[^']*'|\"[^\"]*\"|"
+ r"last\(\)|" # Pick out the only xpath function currently supported
r"::|"
r"//?|"
r"\.\.|"
@@ -71,6 +72,9 @@
r"\s+"
)
+# Find integers, possibly preceded by - or +
+int_re = re.compile(r"[\+\-]?\d+$")
+
def xpath_tokenizer(pattern, namespaces=None):
default_namespace = namespaces.get('') if namespaces else None
parsing_attribute = False
@@ -85,11 +89,22 @@ def xpath_tokenizer(pattern, namespaces=None):
yield ttype, "{%s}%s" % (namespaces[prefix], uri)
except KeyError:
raise SyntaxError("prefix %r not found in prefix map" % prefix) from None
- elif default_namespace and not parsing_attribute:
+ # We don't preprend the default_namespace when:
+ # - the tag is an attribute as the xml spec says default namespaces
+ # don't apply to attributes
+ # - when the tag is a number, possibly preceded by - or +, as these
+ # are not valid characters to start a tag with and are probably
+ # used as positional predicates.
+ elif default_namespace and not (parsing_attribute or int_re.match(tag)):
yield ttype, "{%s}%s" % (default_namespace, tag)
else:
yield token
parsing_attribute = False
+ elif ttype == 'last()':
+ # Break the found 'last()' part into the separate 'tag' and 'ttype'
+ # separate returned values expected from this generator
+ yield ('', 'last')
+ yield ('()', '')
else:
yield token
parsing_attribute = ttype == '@'
@@ -266,7 +281,7 @@ def select_negated(context, result):
if (attr_value := elem.get(key)) is not None and attr_value != value:
yield elem
return select_negated if '!=' in signature else select
- if signature == "-" and not re.match(r"\-?\d+$", predicate[0]):
+ if signature == "-" and not int_re.match(predicate[0]):
# [tag]
tag = predicate[0]
def select(context, result):
@@ -276,7 +291,7 @@ def select(context, result):
return select
if signature == ".='" or signature == ".!='" or (
(signature == "-='" or signature == "-!='")
- and not re.match(r"\-?\d+$", predicate[0])):
+ and not int_re.match(predicate[0])):
# [.='value'] or [tag='value'] or [.!='value'] or [tag!='value']
tag = predicate[0]
value = predicate[-1]
diff --git a/Misc/NEWS.d/next/Library/2025-04-23-11-57-05.gh-issue-132637.i5a8nM.rst b/Misc/NEWS.d/next/Library/2025-04-23-11-57-05.gh-issue-132637.i5a8nM.rst
new file mode 100644
index 00000000000000..8ab2a9df071b7a
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-04-23-11-57-05.gh-issue-132637.i5a8nM.rst
@@ -0,0 +1,3 @@
+Fix ``xml.etree.ElementPath.xpath_tokenizer`` to correctly handle
+positional predicates when a default namespace is provided in the
+``namespaces`` argument.