From db39f734d60de10ad3e567b1b274a28da41315eb Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 25 Feb 2024 16:08:45 +0100 Subject: [PATCH 1/3] Add LIBXML_RECOVER Setting the recovery option by using a hardcoded value (1) worked already for SimpleXML. For DOM, a small change is necessary because otherwise the recover field overwrites the recovery option. From a quick search on GitHub [1] it looks like this won't clash with existing PHP code as no one seems to define (or use) a constant with such a name. [1] https://github.com/search?q=LIBXML_RECOVER+language%3APHP&type=code&l=PHP --- NEWS | 3 +++ UPGRADING | 3 +++ ext/dom/document.c | 2 +- .../modern/xml/XMLDocument_fromString_03.phpt | 5 ++-- ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt | 23 +++++++++++++++++++ ext/dom/xml_document.c | 4 +++- ext/libxml/libxml.stub.php | 5 ++++ ext/libxml/libxml_arginfo.h | 3 ++- .../tests/xml_parsing_LIBXML_RECOVER.phpt | 21 +++++++++++++++++ 9 files changed, 64 insertions(+), 5 deletions(-) create mode 100644 ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt create mode 100644 ext/simplexml/tests/xml_parsing_LIBXML_RECOVER.phpt diff --git a/NEWS b/NEWS index ca9858374f3ab..228a0f7b20a9d 100644 --- a/NEWS +++ b/NEWS @@ -70,6 +70,9 @@ PHP NEWS . Added LDAP_OPT_X_TLS_PROTOCOL_MAX/LDAP_OPT_X_TLS_PROTOCOL_TLS1_3 constants. (StephenWall) +- LibXML: + . Added LIBXML_RECOVER constant. (nielsdos) + - MBString: . Added mb_trim, mb_ltrim and mb_rtrim. (Yuya Hamada) diff --git a/UPGRADING b/UPGRADING index 9c0909963aeb8..c757eda334e50 100644 --- a/UPGRADING +++ b/UPGRADING @@ -482,6 +482,9 @@ PHP 8.4 UPGRADE NOTES . LDAP_OPT_X_TLS_PROTOCOL_MAX. . LDAP_OPT_X_TLS_PROTOCOL_TLS1_3. +- LibXML: + . LIBXML_RECOVER. + - OpenSSL: . X509_PURPOSE_OCSP_HELPER. . X509_PURPOSE_TIMESTAMP_SIGN. diff --git a/ext/dom/document.c b/ext/dom/document.c index 6c318bbb3dd2e..6268f9b687594 100644 --- a/ext/dom/document.c +++ b/ext/dom/document.c @@ -1261,7 +1261,7 @@ xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, const char *source, resolve_externals = doc_props->resolveexternals; keep_blanks = doc_props->preservewhitespace; substitute_ent = doc_props->substituteentities; - recover = doc_props->recover; + recover = doc_props->recover || (options & XML_PARSE_RECOVER) == XML_PARSE_RECOVER; xmlInitParser(); diff --git a/ext/dom/tests/modern/xml/XMLDocument_fromString_03.phpt b/ext/dom/tests/modern/xml/XMLDocument_fromString_03.phpt index 359f7086efcea..2c2bb2ba33a3e 100644 --- a/ext/dom/tests/modern/xml/XMLDocument_fromString_03.phpt +++ b/ext/dom/tests/modern/xml/XMLDocument_fromString_03.phpt @@ -6,7 +6,7 @@ dom --EXPECT-- -DOM\XMLDocument::createFromString(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_NOENT, LIBXML_DTDLOAD, LIBXML_DTDATTR, LIBXML_DTDVALID, LIBXML_NOERROR, LIBXML_NOWARNING, LIBXML_NOBLANKS, LIBXML_XINCLUDE, LIBXML_NSCLEAN, LIBXML_NOCDATA, LIBXML_NONET, LIBXML_PEDANTIC, LIBXML_COMPACT, LIBXML_PARSEHUGE, LIBXML_BIGLINES)bool(true) +DOM\XMLDocument::createFromString(): Argument #2 ($options) contains invalid flags (allowed flags: LIBXML_RECOVER, LIBXML_NOENT, LIBXML_DTDLOAD, LIBXML_DTDATTR, LIBXML_DTDVALID, LIBXML_NOERROR, LIBXML_NOWARNING, LIBXML_NOBLANKS, LIBXML_XINCLUDE, LIBXML_NSCLEAN, LIBXML_NOCDATA, LIBXML_NONET, LIBXML_PEDANTIC, LIBXML_COMPACT, LIBXML_PARSEHUGE, LIBXML_BIGLINES)bool(true) +bool(true) bool(true) bool(true) bool(true) diff --git a/ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt b/ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt new file mode 100644 index 0000000000000..914b674d96949 --- /dev/null +++ b/ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt @@ -0,0 +1,23 @@ +--TEST-- +XML parsing with LIBXML_RECOVER +--EXTENSIONS-- +dom +--FILE-- +loadXML('foo', options: LIBXML_RECOVER); +echo $dom->saveXML(); + +$dom = DOM\XMLDocument::createFromString('foo', options: LIBXML_RECOVER); +echo $dom->saveXML(), "\n"; + +?> +--EXPECTF-- +Warning: DOMDocument::loadXML(): expected '>' in Entity, line: 1 in %s on line %d + +foo + +Warning: DOM\XMLDocument::createFromString(): expected '>' in Entity, line: 1 in %s on line %d + +foo diff --git a/ext/dom/xml_document.c b/ext/dom/xml_document.c index c45e2ccfda014..3e50f3caa19a5 100644 --- a/ext/dom/xml_document.c +++ b/ext/dom/xml_document.c @@ -25,7 +25,8 @@ static bool check_options_validity(uint32_t arg_num, zend_long options) { - const zend_long VALID_OPTIONS = XML_PARSE_NOENT + const zend_long VALID_OPTIONS = XML_PARSE_RECOVER + | XML_PARSE_NOENT | XML_PARSE_DTDLOAD | XML_PARSE_DTDATTR | XML_PARSE_DTDVALID @@ -42,6 +43,7 @@ static bool check_options_validity(uint32_t arg_num, zend_long options) | XML_PARSE_BIG_LINES; if ((options & ~VALID_OPTIONS) != 0) { zend_argument_value_error(2, "contains invalid flags (allowed flags: " + "LIBXML_RECOVER, " "LIBXML_NOENT, " "LIBXML_DTDLOAD, " "LIBXML_DTDATTR, " diff --git a/ext/libxml/libxml.stub.php b/ext/libxml/libxml.stub.php index b60a6272503fe..ac9220c4f1d0e 100644 --- a/ext/libxml/libxml.stub.php +++ b/ext/libxml/libxml.stub.php @@ -18,6 +18,11 @@ */ const LIBXML_LOADED_VERSION = UNKNOWN; +/** + * @var int + * @cvalue XML_PARSE_RECOVER + */ +const LIBXML_RECOVER = UNKNOWN; /** * @var int * @cvalue XML_PARSE_NOENT diff --git a/ext/libxml/libxml_arginfo.h b/ext/libxml/libxml_arginfo.h index 5373b5ea003c0..17128faa04699 100644 --- a/ext/libxml/libxml_arginfo.h +++ b/ext/libxml/libxml_arginfo.h @@ -1,5 +1,5 @@ /* This is a generated file, edit the .stub.php file instead. - * Stub hash: 98cdd082ed214f84fd741097dd58979accc37bff */ + * Stub hash: 08e4e3f10ba89430292831f50c4760a362593282 */ ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_libxml_set_streams_context, 0, 1, IS_VOID, 0) ZEND_ARG_INFO(0, context) @@ -59,6 +59,7 @@ static void register_libxml_symbols(int module_number) REGISTER_LONG_CONSTANT("LIBXML_VERSION", LIBXML_VERSION, CONST_PERSISTENT); REGISTER_STRING_CONSTANT("LIBXML_DOTTED_VERSION", LIBXML_DOTTED_VERSION, CONST_PERSISTENT); REGISTER_STRING_CONSTANT("LIBXML_LOADED_VERSION", PHP_LIBXML_LOADED_VERSION, CONST_PERSISTENT); + REGISTER_LONG_CONSTANT("LIBXML_RECOVER", XML_PARSE_RECOVER, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("LIBXML_NOENT", XML_PARSE_NOENT, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("LIBXML_DTDLOAD", XML_PARSE_DTDLOAD, CONST_PERSISTENT); REGISTER_LONG_CONSTANT("LIBXML_DTDATTR", XML_PARSE_DTDATTR, CONST_PERSISTENT); diff --git a/ext/simplexml/tests/xml_parsing_LIBXML_RECOVER.phpt b/ext/simplexml/tests/xml_parsing_LIBXML_RECOVER.phpt new file mode 100644 index 0000000000000..d44e241f1e742 --- /dev/null +++ b/ext/simplexml/tests/xml_parsing_LIBXML_RECOVER.phpt @@ -0,0 +1,21 @@ +--TEST-- +XML parsing with LIBXML_RECOVER +--EXTENSIONS-- +simplexml +--FILE-- +', options: LIBXML_RECOVER)); + +?> +--EXPECTF-- +Warning: simplexml_load_string(): Entity: line 1: parser error : Premature end of data in tag root line 1 in %s on line %d + +Warning: simplexml_load_string(): in %s on line %d + +Warning: simplexml_load_string(): ^ in %s on line %d +object(SimpleXMLElement)#1 (1) { + ["child"]=> + object(SimpleXMLElement)#2 (0) { + } +} From d38ba97953ba4ef0856973e9adce98591c635727 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 25 Feb 2024 16:35:11 +0100 Subject: [PATCH 2/3] Attempt to find an xml input that gives the same warning for all supported libxml versions --- ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt b/ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt index 914b674d96949..f1410719979a1 100644 --- a/ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt +++ b/ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt @@ -6,18 +6,18 @@ dom loadXML('foo', options: LIBXML_RECOVER); +$dom->loadXML('', options: LIBXML_RECOVER); echo $dom->saveXML(); -$dom = DOM\XMLDocument::createFromString('foo', options: LIBXML_RECOVER); +$dom = DOM\XMLDocument::createFromString('', options: LIBXML_RECOVER); echo $dom->saveXML(), "\n"; ?> --EXPECTF-- -Warning: DOMDocument::loadXML(): expected '>' in Entity, line: 1 in %s on line %d +Warning: DOMDocument::loadXML(): Premature end of data in tag root line 1 in Entity, line: 1 in %s on line %d -foo + -Warning: DOM\XMLDocument::createFromString(): expected '>' in Entity, line: 1 in %s on line %d +Warning: DOM\XMLDocument::createFromString(): Premature end of data in tag root line 1 in Entity, line: 1 in %s on line %d -foo + From a2077b86abf0922ef077cee65fc063affe60df2d Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Sun, 25 Feb 2024 20:27:11 +0100 Subject: [PATCH 3/3] Try that again --- ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt | 4 ++-- ext/simplexml/tests/xml_parsing_LIBXML_RECOVER.phpt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt b/ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt index f1410719979a1..89e693d47565d 100644 --- a/ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt +++ b/ext/dom/tests/xml_parsing_LIBXML_RECOVER.phpt @@ -14,10 +14,10 @@ echo $dom->saveXML(), "\n"; ?> --EXPECTF-- -Warning: DOMDocument::loadXML(): Premature end of data in tag root line 1 in Entity, line: 1 in %s on line %d +Warning: DOMDocument::loadXML(): %s -Warning: DOM\XMLDocument::createFromString(): Premature end of data in tag root line 1 in Entity, line: 1 in %s on line %d +Warning: DOM\XMLDocument::createFromString(): %s diff --git a/ext/simplexml/tests/xml_parsing_LIBXML_RECOVER.phpt b/ext/simplexml/tests/xml_parsing_LIBXML_RECOVER.phpt index d44e241f1e742..cfeb8e0c7413f 100644 --- a/ext/simplexml/tests/xml_parsing_LIBXML_RECOVER.phpt +++ b/ext/simplexml/tests/xml_parsing_LIBXML_RECOVER.phpt @@ -9,7 +9,7 @@ var_dump(simplexml_load_string('', options: LIBXML_RECOVER)); ?> --EXPECTF-- -Warning: simplexml_load_string(): Entity: line 1: parser error : Premature end of data in tag root line 1 in %s on line %d +Warning: simplexml_load_string(): %s Warning: simplexml_load_string(): in %s on line %d