61
61
xpath_tokenizer_re = re .compile (
62
62
r"("
63
63
r"'[^']*'|\"[^\"]*\"|"
64
+ r"last\(\)|" # Pick out the only xpath function currently supported
64
65
r"::|"
65
66
r"//?|"
66
67
r"\.\.|"
71
72
r"\s+"
72
73
)
73
74
75
+ # Find integers, possibly preceded by - or +
76
+ int_re = re .compile (r"[\+\-]?\d+$" )
77
+
74
78
def xpath_tokenizer (pattern , namespaces = None ):
75
79
default_namespace = namespaces .get ('' ) if namespaces else None
76
80
parsing_attribute = False
@@ -85,11 +89,22 @@ def xpath_tokenizer(pattern, namespaces=None):
85
89
yield ttype , "{%s}%s" % (namespaces [prefix ], uri )
86
90
except KeyError :
87
91
raise SyntaxError ("prefix %r not found in prefix map" % prefix ) from None
88
- elif default_namespace and not parsing_attribute :
92
+ # We don't preprend the default_namespace when:
93
+ # - the tag is an attribute as the xml spec says default namespaces
94
+ # don't apply to attributes
95
+ # - when the tag is a number, possibly preceded by - or +, as these
96
+ # are not valid characters to start a tag with and are probably
97
+ # used as positional predicates.
98
+ elif default_namespace and not (parsing_attribute or int_re .match (tag )):
89
99
yield ttype , "{%s}%s" % (default_namespace , tag )
90
100
else :
91
101
yield token
92
102
parsing_attribute = False
103
+ elif ttype == 'last()' :
104
+ # Break the found 'last()' part into the separate 'tag' and 'ttype'
105
+ # separate returned values expected from this generator
106
+ yield ('' , 'last' )
107
+ yield ('()' , '' )
93
108
else :
94
109
yield token
95
110
parsing_attribute = ttype == '@'
@@ -266,7 +281,7 @@ def select_negated(context, result):
266
281
if (attr_value := elem .get (key )) is not None and attr_value != value :
267
282
yield elem
268
283
return select_negated if '!=' in signature else select
269
- if signature == "-" and not re .match (r"\-?\d+$" , predicate [0 ]):
284
+ if signature == "-" and not int_re .match (predicate [0 ]):
270
285
# [tag]
271
286
tag = predicate [0 ]
272
287
def select (context , result ):
@@ -276,7 +291,7 @@ def select(context, result):
276
291
return select
277
292
if signature == ".='" or signature == ".!='" or (
278
293
(signature == "-='" or signature == "-!='" )
279
- and not re .match (r"\-?\d+$" , predicate [0 ])):
294
+ and not int_re .match (predicate [0 ])):
280
295
# [.='value'] or [tag='value'] or [.!='value'] or [tag!='value']
281
296
tag = predicate [0 ]
282
297
value = predicate [- 1 ]
0 commit comments