From db39f734d60de10ad3e567b1b274a28da41315eb Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sun, 25 Feb 2024 16:08:45 +0100
Subject: [PATCH 1/3] Add LIBXML_RECOVER
Setting the recovery option by using a hardcoded value (1) worked
already for SimpleXML. For DOM, a small change is necessary because
otherwise the recover field overwrites the recovery option.
From a quick search on GitHub [1] it looks like this won't clash with
existing PHP code as no one seems to define (or use) a constant with
such a name.
[1] https://github.com/search?q=LIBXML_RECOVER+language%3APHP&type=code&l=PHP
---
NEWS | 3 +++
UPGRADING | 3 +++
ext/dom/document.c | 2 +-
.../modern/xml/XMLDocument_fromString_03.phpt | 5 ++--
ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt | 23 +++++++++++++++++++
ext/dom/xml_document.c | 4 +++-
ext/libxml/libxml.stub.php | 5 ++++
ext/libxml/libxml_arginfo.h | 3 ++-
.../tests/xml_parsing_LIBXML_RECOVER.phpt | 21 +++++++++++++++++
9 files changed, 64 insertions(+), 5 deletions(-)
create mode 100644 ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt
create mode 100644 ext/simplexml/tests/xml_parsing_LIBXML_RECOVER.phpt
diff --git a/NEWS b/NEWS
index ca9858374f3ab..228a0f7b20a9d 100644
--- a/NEWS
+++ b/NEWS
@@ -70,6 +70,9 @@ PHP NEWS
. Added LDAP_OPT_X_TLS_PROTOCOL_MAX/LDAP_OPT_X_TLS_PROTOCOL_TLS1_3
constants. (StephenWall)
+- LibXML:
+ . Added LIBXML_RECOVER constant. (nielsdos)
+
- MBString:
. Added mb_trim, mb_ltrim and mb_rtrim. (Yuya Hamada)
diff --git a/UPGRADING b/UPGRADING
index 9c0909963aeb8..c757eda334e50 100644
--- a/UPGRADING
+++ b/UPGRADING
@@ -482,6 +482,9 @@ PHP 8.4 UPGRADE NOTES
. LDAP_OPT_X_TLS_PROTOCOL_MAX.
. LDAP_OPT_X_TLS_PROTOCOL_TLS1_3.
+- LibXML:
+ . LIBXML_RECOVER.
+
- OpenSSL:
. X509_PURPOSE_OCSP_HELPER.
. X509_PURPOSE_TIMESTAMP_SIGN.
diff --git a/ext/dom/document.c b/ext/dom/document.c
index 6c318bbb3dd2e..6268f9b687594 100644
--- a/ext/dom/document.c
+++ b/ext/dom/document.c
@@ -1261,7 +1261,7 @@ xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, const char *source,
resolve_externals = doc_props->resolveexternals;
keep_blanks = doc_props->preservewhitespace;
substitute_ent = doc_props->substituteentities;
- recover = doc_props->recover;
+ recover = doc_props->recover || (options & XML_PARSE_RECOVER) == XML_PARSE_RECOVER;
xmlInitParser();
diff --git a/ext/dom/tests/modern/xml/XMLDocument_fromString_03.phpt b/ext/dom/tests/modern/xml/XMLDocument_fromString_03.phpt
index 359f7086efcea..2c2bb2ba33a3e 100644
--- a/ext/dom/tests/modern/xml/XMLDocument_fromString_03.phpt
+++ b/ext/dom/tests/modern/xml/XMLDocument_fromString_03.phpt
@@ -6,7 +6,7 @@ dom
--EXPECT--
-DOM\XMLDocument::createFromString(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOENT, LIBXML_DTDLOAD, LIBXML_DTDATTR, LIBXML_DTDVALID, LIBXML_NOERROR, LIBXML_NOWARNING, LIBXML_NOBLANKS, LIBXML_XINCLUDE, LIBXML_NSCLEAN, LIBXML_NOCDATA, LIBXML_NONET, LIBXML_PEDANTIC, LIBXML_COMPACT, LIBXML_PARSEHUGE, LIBXML_BIGLINES)bool(true)
+DOM\XMLDocument::createFromString(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_RECOVER, LIBXML_NOENT, LIBXML_DTDLOAD, LIBXML_DTDATTR, LIBXML_DTDVALID, LIBXML_NOERROR, LIBXML_NOWARNING, LIBXML_NOBLANKS, LIBXML_XINCLUDE, LIBXML_NSCLEAN, LIBXML_NOCDATA, LIBXML_NONET, LIBXML_PEDANTIC, LIBXML_COMPACT, LIBXML_PARSEHUGE, LIBXML_BIGLINES)bool(true)
+bool(true)
bool(true)
bool(true)
bool(true)
diff --git a/ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt b/ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt
new file mode 100644
index 0000000000000..914b674d96949
--- /dev/null
+++ b/ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt
@@ -0,0 +1,23 @@
+--TEST--
+XML parsing with LIBXML_RECOVER
+--EXTENSIONS--
+dom
+--FILE--
+loadXML('foo', options: LIBXML_RECOVER);
+echo $dom->saveXML();
+
+$dom = DOM\XMLDocument::createFromString('foo', options: LIBXML_RECOVER);
+echo $dom->saveXML(), "\n";
+
+?>
+--EXPECTF--
+Warning: DOMDocument::loadXML(): expected '>' in Entity, line: 1 in %s on line %d
+
+foo
+
+Warning: DOM\XMLDocument::createFromString(): expected '>' in Entity, line: 1 in %s on line %d
+
+foo
diff --git a/ext/dom/xml_document.c b/ext/dom/xml_document.c
index c45e2ccfda014..3e50f3caa19a5 100644
--- a/ext/dom/xml_document.c
+++ b/ext/dom/xml_document.c
@@ -25,7 +25,8 @@
static bool check_options_validity(uint32_t arg_num, zend_long options)
{
- const zend_long VALID_OPTIONS = XML_PARSE_NOENT
+ const zend_long VALID_OPTIONS = XML_PARSE_RECOVER
+ | XML_PARSE_NOENT
| XML_PARSE_DTDLOAD
| XML_PARSE_DTDATTR
| XML_PARSE_DTDVALID
@@ -42,6 +43,7 @@ static bool check_options_validity(uint32_t arg_num, zend_long options)
| XML_PARSE_BIG_LINES;
if ((options & ~VALID_OPTIONS) != 0) {
zend_argument_value_error(2, "contains invalid flags (allowed flags: "
+ "LIBXML_RECOVER, "
"LIBXML_NOENT, "
"LIBXML_DTDLOAD, "
"LIBXML_DTDATTR, "
diff --git a/ext/libxml/libxml.stub.php b/ext/libxml/libxml.stub.php
index b60a6272503fe..ac9220c4f1d0e 100644
--- a/ext/libxml/libxml.stub.php
+++ b/ext/libxml/libxml.stub.php
@@ -18,6 +18,11 @@
*/
const LIBXML_LOADED_VERSION = UNKNOWN;
+/**
+ * @var int
+ * @cvalue XML_PARSE_RECOVER
+ */
+const LIBXML_RECOVER = UNKNOWN;
/**
* @var int
* @cvalue XML_PARSE_NOENT
diff --git a/ext/libxml/libxml_arginfo.h b/ext/libxml/libxml_arginfo.h
index 5373b5ea003c0..17128faa04699 100644
--- a/ext/libxml/libxml_arginfo.h
+++ b/ext/libxml/libxml_arginfo.h
@@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
- * Stub hash: 98cdd082ed214f84fd741097dd58979accc37bff */
+ * Stub hash: 08e4e3f10ba89430292831f50c4760a362593282 */
ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_libxml_set_streams_context, 0, 1, IS_VOID, 0)
ZEND_ARG_INFO(0, context)
@@ -59,6 +59,7 @@ static void register_libxml_symbols(int module_number)
REGISTER_LONG_CONSTANT("LIBXML_VERSION", LIBXML_VERSION, CONST_PERSISTENT);
REGISTER_STRING_CONSTANT("LIBXML_DOTTED_VERSION", LIBXML_DOTTED_VERSION, CONST_PERSISTENT);
REGISTER_STRING_CONSTANT("LIBXML_LOADED_VERSION", PHP_LIBXML_LOADED_VERSION, CONST_PERSISTENT);
+ REGISTER_LONG_CONSTANT("LIBXML_RECOVER", XML_PARSE_RECOVER, CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("LIBXML_NOENT", XML_PARSE_NOENT, CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("LIBXML_DTDLOAD", XML_PARSE_DTDLOAD, CONST_PERSISTENT);
REGISTER_LONG_CONSTANT("LIBXML_DTDATTR", XML_PARSE_DTDATTR, CONST_PERSISTENT);
diff --git a/ext/simplexml/tests/xml_parsing_LIBXML_RECOVER.phpt b/ext/simplexml/tests/xml_parsing_LIBXML_RECOVER.phpt
new file mode 100644
index 0000000000000..d44e241f1e742
--- /dev/null
+++ b/ext/simplexml/tests/xml_parsing_LIBXML_RECOVER.phpt
@@ -0,0 +1,21 @@
+--TEST--
+XML parsing with LIBXML_RECOVER
+--EXTENSIONS--
+simplexml
+--FILE--
+', options: LIBXML_RECOVER));
+
+?>
+--EXPECTF--
+Warning: simplexml_load_string(): Entity: line 1: parser error : Premature end of data in tag root line 1 in %s on line %d
+
+Warning: simplexml_load_string(): in %s on line %d
+
+Warning: simplexml_load_string(): ^ in %s on line %d
+object(SimpleXMLElement)#1 (1) {
+ ["child"]=>
+ object(SimpleXMLElement)#2 (0) {
+ }
+}
From d38ba97953ba4ef0856973e9adce98591c635727 Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sun, 25 Feb 2024 16:35:11 +0100
Subject: [PATCH 2/3] Attempt to find an xml input that gives the same warning
for all supported libxml versions
---
ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt b/ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt
index 914b674d96949..f1410719979a1 100644
--- a/ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt
+++ b/ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt
@@ -6,18 +6,18 @@ dom
loadXML('foo', options: LIBXML_RECOVER);
+$dom->loadXML('', options: LIBXML_RECOVER);
echo $dom->saveXML();
-$dom = DOM\XMLDocument::createFromString('foo', options: LIBXML_RECOVER);
+$dom = DOM\XMLDocument::createFromString('', options: LIBXML_RECOVER);
echo $dom->saveXML(), "\n";
?>
--EXPECTF--
-Warning: DOMDocument::loadXML(): expected '>' in Entity, line: 1 in %s on line %d
+Warning: DOMDocument::loadXML(): Premature end of data in tag root line 1 in Entity, line: 1 in %s on line %d
-foo
+
-Warning: DOM\XMLDocument::createFromString(): expected '>' in Entity, line: 1 in %s on line %d
+Warning: DOM\XMLDocument::createFromString(): Premature end of data in tag root line 1 in Entity, line: 1 in %s on line %d
-foo
+
From a2077b86abf0922ef077cee65fc063affe60df2d Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Sun, 25 Feb 2024 20:27:11 +0100
Subject: [PATCH 3/3] Try that again
---
ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt | 4 ++--
ext/simplexml/tests/xml_parsing_LIBXML_RECOVER.phpt | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt b/ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt
index f1410719979a1..89e693d47565d 100644
--- a/ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt
+++ b/ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt
@@ -14,10 +14,10 @@ echo $dom->saveXML(), "\n";
?>
--EXPECTF--
-Warning: DOMDocument::loadXML(): Premature end of data in tag root line 1 in Entity, line: 1 in %s on line %d
+Warning: DOMDocument::loadXML(): %s
-Warning: DOM\XMLDocument::createFromString(): Premature end of data in tag root line 1 in Entity, line: 1 in %s on line %d
+Warning: DOM\XMLDocument::createFromString(): %s
diff --git a/ext/simplexml/tests/xml_parsing_LIBXML_RECOVER.phpt b/ext/simplexml/tests/xml_parsing_LIBXML_RECOVER.phpt
index d44e241f1e742..cfeb8e0c7413f 100644
--- a/ext/simplexml/tests/xml_parsing_LIBXML_RECOVER.phpt
+++ b/ext/simplexml/tests/xml_parsing_LIBXML_RECOVER.phpt
@@ -9,7 +9,7 @@ var_dump(simplexml_load_string('', options: LIBXML_RECOVER));
?>
--EXPECTF--
-Warning: simplexml_load_string(): Entity: line 1: parser error : Premature end of data in tag root line 1 in %s on line %d
+Warning: simplexml_load_string(): %s
Warning: simplexml_load_string(): in %s on line %d