1919
2020class PageContent
2121{
22- protected Page $ page ;
23-
24- /**
25- * PageContent constructor.
26- */
27- public function __construct (Page $ page )
28- {
29- $ this ->page = $ page ;
22+ public function __construct (
23+ protected Page $ page
24+ ) {
3025 }
3126
3227 /**
3328 * Update the content of the page with new provided HTML.
3429 */
35- public function setNewHTML (string $ html )
30+ public function setNewHTML (string $ html ): void
3631 {
3732 $ html = $ this ->extractBase64ImagesFromHtml ($ html );
3833 $ this ->page ->html = $ this ->formatHtml ($ html );
@@ -43,7 +38,7 @@ public function setNewHTML(string $html)
4338 /**
4439 * Update the content of the page with new provided Markdown content.
4540 */
46- public function setNewMarkdown (string $ markdown )
41+ public function setNewMarkdown (string $ markdown ): void
4742 {
4843 $ markdown = $ this ->extractBase64ImagesFromMarkdown ($ markdown );
4944 $ this ->page ->markdown = $ markdown ;
@@ -57,7 +52,7 @@ public function setNewMarkdown(string $markdown)
5752 */
5853 protected function extractBase64ImagesFromHtml (string $ htmlText ): string
5954 {
60- if (empty ($ htmlText ) || strpos ($ htmlText , 'data:image ' ) === false ) {
55+ if (empty ($ htmlText ) || ! str_contains ($ htmlText , 'data:image ' )) {
6156 return $ htmlText ;
6257 }
6358
@@ -91,7 +86,7 @@ protected function extractBase64ImagesFromHtml(string $htmlText): string
9186 * Attempting to capture the whole data uri using regex can cause PHP
9287 * PCRE limits to be hit with larger, multi-MB, files.
9388 */
94- protected function extractBase64ImagesFromMarkdown (string $ markdown )
89+ protected function extractBase64ImagesFromMarkdown (string $ markdown ): string
9590 {
9691 $ matches = [];
9792 $ contentLength = strlen ($ markdown );
@@ -183,32 +178,13 @@ protected function formatHtml(string $htmlText): string
183178 $ childNodes = $ body ->childNodes ;
184179 $ xPath = new DOMXPath ($ doc );
185180
186- // Set ids on top-level nodes
181+ // Map to hold used ID references
187182 $ idMap = [];
188- foreach ($ childNodes as $ index => $ childNode ) {
189- [$ oldId , $ newId ] = $ this ->setUniqueId ($ childNode , $ idMap );
190- if ($ newId && $ newId !== $ oldId ) {
191- $ this ->updateLinks ($ xPath , '# ' . $ oldId , '# ' . $ newId );
192- }
193- }
183+ // Map to hold changing ID references
184+ $ changeMap = [];
194185
195- // Set ids on nested header nodes
196- $ nestedHeaders = $ xPath ->query ('//body//*//h1|//body//*//h2|//body//*//h3|//body//*//h4|//body//*//h5|//body//*//h6 ' );
197- foreach ($ nestedHeaders as $ nestedHeader ) {
198- [$ oldId , $ newId ] = $ this ->setUniqueId ($ nestedHeader , $ idMap );
199- if ($ newId && $ newId !== $ oldId ) {
200- $ this ->updateLinks ($ xPath , '# ' . $ oldId , '# ' . $ newId );
201- }
202- }
203-
204- // Ensure no duplicate ids within child items
205- $ idElems = $ xPath ->query ('//body//*//*[@id] ' );
206- foreach ($ idElems as $ domElem ) {
207- [$ oldId , $ newId ] = $ this ->setUniqueId ($ domElem , $ idMap );
208- if ($ newId && $ newId !== $ oldId ) {
209- $ this ->updateLinks ($ xPath , '# ' . $ oldId , '# ' . $ newId );
210- }
211- }
186+ $ this ->updateIdsRecursively ($ body , 0 , $ idMap , $ changeMap );
187+ $ this ->updateLinks ($ xPath , $ changeMap );
212188
213189 // Generate inner html as a string
214190 $ html = '' ;
@@ -223,20 +199,53 @@ protected function formatHtml(string $htmlText): string
223199 }
224200
225201 /**
226- * Update the all links to the $old location to instead point to $new.
202+ * For the given DOMNode, traverse its children recursively and update IDs
203+ * where required (Top-level, headers & elements with IDs).
204+ * Will update the provided $changeMap array with changes made, where keys are the old
205+ * ids and the corresponding values are the new ids.
206+ */
207+ protected function updateIdsRecursively (DOMNode $ element , int $ depth , array &$ idMap , array &$ changeMap ): void
208+ {
209+ /* @var DOMNode $child */
210+ foreach ($ element ->childNodes as $ child ) {
211+ if ($ child instanceof DOMElement && ($ depth === 0 || in_array ($ child ->nodeName , ['h1 ' , 'h2 ' , 'h3 ' , 'h4 ' , 'h5 ' , 'h6 ' ]) || $ child ->getAttribute ('id ' ))) {
212+ [$ oldId , $ newId ] = $ this ->setUniqueId ($ child , $ idMap );
213+ if ($ newId && $ newId !== $ oldId && !isset ($ idMap [$ oldId ])) {
214+ $ changeMap [$ oldId ] = $ newId ;
215+ }
216+ }
217+
218+ if ($ child ->hasChildNodes ()) {
219+ $ this ->updateIdsRecursively ($ child , $ depth + 1 , $ idMap , $ changeMap );
220+ }
221+ }
222+ }
223+
224+ /**
225+ * Update the all links in the given xpath to apply requires changes within the
226+ * given $changeMap array.
227227 */
228- protected function updateLinks (DOMXPath $ xpath , string $ old , string $ new )
228+ protected function updateLinks (DOMXPath $ xpath , array $ changeMap ): void
229229 {
230- $ old = str_replace ('" ' , '' , $ old );
231- $ matchingLinks = $ xpath ->query ('//body//*//*[@href=" ' . $ old . '"] ' );
232- foreach ($ matchingLinks as $ domElem ) {
233- $ domElem ->setAttribute ('href ' , $ new );
230+ if (empty ($ changeMap )) {
231+ return ;
232+ }
233+
234+ $ links = $ xpath ->query ('//body//*//*[@href] ' );
235+ /** @var DOMElement $domElem */
236+ foreach ($ links as $ domElem ) {
237+ $ href = ltrim ($ domElem ->getAttribute ('href ' ), '# ' );
238+ $ newHref = $ changeMap [$ href ] ?? null ;
239+ if ($ newHref ) {
240+ $ domElem ->setAttribute ('href ' , '# ' . $ newHref );
241+ }
234242 }
235243 }
236244
237245 /**
238246 * Set a unique id on the given DOMElement.
239- * A map for existing ID's should be passed in to check for current existence.
247+ * A map for existing ID's should be passed in to check for current existence,
248+ * and this will be updated with any new IDs set upon elements.
240249 * Returns a pair of strings in the format [old_id, new_id].
241250 */
242251 protected function setUniqueId (DOMNode $ element , array &$ idMap ): array
@@ -247,7 +256,7 @@ protected function setUniqueId(DOMNode $element, array &$idMap): array
247256
248257 // Stop if there's an existing valid id that has not already been used.
249258 $ existingId = $ element ->getAttribute ('id ' );
250- if (strpos ($ existingId , 'bkmrk ' ) === 0 && !isset ($ idMap [$ existingId ])) {
259+ if (str_starts_with ($ existingId , 'bkmrk ' ) && !isset ($ idMap [$ existingId ])) {
251260 $ idMap [$ existingId ] = true ;
252261
253262 return [$ existingId , $ existingId ];
@@ -258,7 +267,7 @@ protected function setUniqueId(DOMNode $element, array &$idMap): array
258267 // the same content is passed through.
259268 $ contentId = 'bkmrk- ' . mb_substr (strtolower (preg_replace ('/\s+/ ' , '- ' , trim ($ element ->nodeValue ))), 0 , 20 );
260269 $ newId = urlencode ($ contentId );
261- $ loopIndex = 0 ;
270+ $ loopIndex = 1 ;
262271
263272 while (isset ($ idMap [$ newId ])) {
264273 $ newId = urlencode ($ contentId . '- ' . $ loopIndex );
0 commit comments