Skip to content

Commit cffeadf

Browse files
committed
DOMXPath::quote(string $str): string
method to quote strings in XPath, similar to PDO::quote() / mysqli::real_escape_string sample usage: $xp->query("//span[contains(text()," . $xp->quote($string) . ")]") the algorithm is derived from Robert Rossney's research into XPath quoting published at https://stackoverflow.com/a/1352556/1067003 (but using an improved implementation I wrote myself, originally for chrome-php/chrome#575 )
1 parent 7ed26c0 commit cffeadf

File tree

4 files changed

+185
-1
lines changed

4 files changed

+185
-1
lines changed

ext/dom/php_dom.stub.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -934,6 +934,8 @@ public function registerNamespace(string $prefix, string $namespace): bool {}
934934
public function registerPhpFunctions(string|array|null $restrict = null): void {}
935935

936936
public function registerPhpFunctionNS(string $namespaceURI, string $name, callable $callable): void {}
937+
938+
public static function quote(string $str): string {}
937939
}
938940
#endif
939941

ext/dom/php_dom_arginfo.h

Lines changed: 11 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ext/dom/tests/DOMXPath_quote.phpt

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
--TEST--
2+
Test DOMXPath::quote with various inputs
3+
--SKIPIF--
4+
<?php if (!class_exists('DOMXPath')) die('skip DOMXPath not available.'); ?>
5+
--FILE--
6+
<?php
7+
$dom = new DOMDocument();
8+
$xpath = new DOMXPath($dom);
9+
10+
// method to quote strings in XPath, similar to PDO::quote()
11+
12+
/**
13+
* Quote a string for use in an XPath expression.
14+
*
15+
* Example: $xp->query("//span[contains(text()," . $xp->quote($string) . ")]")
16+
*
17+
* @param string $string string to quote.
18+
* @return string quoted string.
19+
*/
20+
function UserlandDOMXPathQuote(string $string): string
21+
{
22+
if (false === \strpos($string, '\'')) {
23+
return '\'' . $string . '\'';
24+
}
25+
if (false === \strpos($string, '"')) {
26+
return '"' . $string . '"';
27+
}
28+
// if the string contains both single and double quotes, construct an
29+
// expression that concatenates all non-double-quote substrings with
30+
// the quotes, e.g.:
31+
// 'foo'"bar => concat("'foo'", '"bar")
32+
$sb = [];
33+
while ($string !== '') {
34+
$bytesUntilSingleQuote = \strcspn($string, '\'');
35+
$bytesUntilDoubleQuote = \strcspn($string, '"');
36+
$quoteMethod = ($bytesUntilSingleQuote > $bytesUntilDoubleQuote) ? "'" : '"';
37+
$bytesUntilQuote = \max($bytesUntilSingleQuote, $bytesUntilDoubleQuote);
38+
$sb[] = $quoteMethod . \substr($string, 0, $bytesUntilQuote) . $quoteMethod;
39+
$string = \substr($string, $bytesUntilQuote);
40+
}
41+
$sb = \implode(',', $sb);
42+
return 'concat(' . $sb . ')';
43+
}
44+
45+
46+
47+
$tests = [
48+
'foo' => "'foo'", // no quotes
49+
'"foo' => '\'"foo\'', // double quotes only
50+
'\'foo' => '"\'foo"', // single quotes only
51+
'\'foo"bar' => 'concat("\'foo",\'"bar\')', // both; double quotes in mid-string
52+
'\'foo"bar"baz' => 'concat("\'foo",\'"bar"baz\')', // multiple double quotes in mid-string
53+
'\'foo"' => 'concat("\'foo",\'"\')', // string ends with double quotes
54+
'\'foo""' => 'concat("\'foo",\'""\')', // string ends with run of double quotes
55+
'"\'foo' => 'concat(\'"\',"\'foo")', // string begins with double quotes
56+
'""\'foo' => 'concat(\'""\',"\'foo")', // string begins with run of double quotes
57+
'\'foo""bar' => 'concat("\'foo",\'""bar\')', // run of double quotes in mid-string
58+
];
59+
60+
foreach ($tests as $input => $expected) {
61+
$result = $xpath->quote($input);
62+
if ($result === $expected) {
63+
echo "Pass: {$input} => {$result}\n";
64+
} else {
65+
echo 'Fail: ';
66+
var_dump([
67+
'input' => $input,
68+
'expected' => $expected,
69+
'result' => $result,
70+
'userland_implementation_result' => UserlandDOMXPathQuote($input),
71+
]);
72+
}
73+
}
74+
?>
75+
--EXPECT--
76+
Pass: foo => 'foo'
77+
Pass: "foo => '"foo'
78+
Pass: 'foo => "'foo"
79+
Pass: 'foo"bar => concat("'foo",'"bar')
80+
Pass: 'foo"bar"baz => concat("'foo",'"bar"baz')
81+
Pass: 'foo" => concat("'foo",'"')
82+
Pass: 'foo"" => concat("'foo",'""')
83+
Pass: "'foo => concat('"',"'foo")
84+
Pass: ""'foo => concat('""',"'foo")
85+
Pass: 'foo""bar => concat("'foo",'""bar')

ext/dom/xpath.c

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,93 @@ PHP_METHOD(DOMXPath, registerPhpFunctionNS)
446446
);
447447
}
448448

449+
/* {{{ */
450+
PHP_METHOD(DOMXPath, quote) {
451+
char *input;
452+
size_t input_len;
453+
char *output;
454+
size_t output_len = 0;
455+
456+
if (zend_parse_parameters(ZEND_NUM_ARGS(), "s", &input, &input_len) ==
457+
FAILURE) {
458+
RETURN_THROWS();
459+
}
460+
if (memchr(input, '\'', input_len) == NULL) {
461+
output_len = input_len + 2;
462+
output = emalloc(output_len);
463+
output[0] = '\'';
464+
memcpy(output + 1, input, input_len);
465+
output[output_len - 1] = '\'';
466+
} else if (memchr(input, '"', input_len) == NULL) {
467+
output_len = input_len + 2;
468+
output = emalloc(output_len);
469+
output[0] = '"';
470+
memcpy(output + 1, input, input_len);
471+
output[output_len - 1] = '"';
472+
} else {
473+
// need to do the concat() trick
474+
// first lets calculate the length (probably faster than repeated reallocs)
475+
output_len = strlen("concat(");
476+
size_t i;
477+
for (size_t i = 0; i < input_len; ++i) {
478+
uintptr_t bytesUntilSingleQuote =
479+
(uintptr_t)memchr(input + i, '\'', input_len - i);
480+
if (bytesUntilSingleQuote == 0) {
481+
bytesUntilSingleQuote = input_len - i;
482+
} else {
483+
bytesUntilSingleQuote = bytesUntilSingleQuote - (uintptr_t)(input + i);
484+
}
485+
uintptr_t bytesUntilDoubleQuote =
486+
(uintptr_t)memchr(input + i, '"', input_len - i);
487+
if (bytesUntilDoubleQuote == 0) {
488+
bytesUntilDoubleQuote = input_len - i;
489+
} else {
490+
bytesUntilDoubleQuote = bytesUntilDoubleQuote - (uintptr_t)(input + i);
491+
}
492+
const size_t bytesUntilQuote =
493+
(bytesUntilSingleQuote > bytesUntilDoubleQuote)
494+
? bytesUntilSingleQuote
495+
: bytesUntilDoubleQuote;
496+
i += bytesUntilQuote - 1;
497+
output_len += 1 + bytesUntilQuote + 1 + 1; // "bytesUntilQuote"[,)]
498+
}
499+
output = emalloc(output_len);
500+
size_t outputPos = strlen("concat(");
501+
memcpy(output, "concat(", outputPos);
502+
for (size_t i = 0; i < input_len; ++i) {
503+
uintptr_t bytesUntilSingleQuote =
504+
(uintptr_t)memchr(input + i, '\'', input_len - i);
505+
if (bytesUntilSingleQuote == 0) {
506+
bytesUntilSingleQuote = input_len - i;
507+
} else {
508+
bytesUntilSingleQuote = bytesUntilSingleQuote - (uintptr_t)(input + i);
509+
}
510+
uintptr_t bytesUntilDoubleQuote =
511+
(uintptr_t)memchr(input + i, '"', input_len - i);
512+
if (bytesUntilDoubleQuote == 0) {
513+
bytesUntilDoubleQuote = input_len - i;
514+
} else {
515+
bytesUntilDoubleQuote = bytesUntilDoubleQuote - (uintptr_t)(input + i);
516+
}
517+
const size_t bytesUntilQuote =
518+
(bytesUntilSingleQuote > bytesUntilDoubleQuote)
519+
? bytesUntilSingleQuote
520+
: bytesUntilDoubleQuote;
521+
const char quoteMethod =
522+
(bytesUntilSingleQuote > bytesUntilDoubleQuote) ? '\'' : '"';
523+
output[outputPos++] = quoteMethod;
524+
memcpy(output + outputPos, input + i, bytesUntilQuote);
525+
outputPos += bytesUntilQuote;
526+
output[outputPos++] = quoteMethod;
527+
i += bytesUntilQuote - 1;
528+
output[outputPos++] = ',';
529+
}
530+
output[outputPos - 1] = ')';
531+
}
532+
RETVAL_STRINGL(output, output_len);
533+
}
534+
/* }}} */
535+
449536
#endif /* LIBXML_XPATH_ENABLED */
450537

451538
#endif

0 commit comments

Comments
 (0)