pageHtml); $tags = $this->locateAndIsolateIncludeTags($doc); foreach ($tags as $tag) { $htmlContent = $this->pageContentForId->call($this, $tag->getPageId()); $content = new PageIncludeContent($htmlContent, $tag); if (!$content->isInline()) { $parentP = $this->getParentParagraph($tag->domNode); $isWithinParentP = $parentP === $tag->domNode->parentNode; if ($parentP && $isWithinParentP) { $this->splitNodeAtChildNode($tag->domNode->parentNode, $tag->domNode); } else if ($parentP) { $this->moveTagNodeToBesideParent($tag, $parentP); } } $this->replaceNodeWithNodes($tag->domNode, $content->toDomNodes()); } $this->cleanup(); return $doc->getBodyInnerHtml(); } /** * Locate include tags within the given document, isolating them to their * own nodes in the DOM for future targeted manipulation. * @return PageIncludeTag[] */ protected function locateAndIsolateIncludeTags(HtmlDocument $doc): array { $includeHosts = $doc->queryXPath("//body//*[text()[contains(., '{{@')]]"); $includeTags = []; /** @var DOMNode $node */ /** @var DOMNode $childNode */ foreach ($includeHosts as $node) { foreach ($node->childNodes as $childNode) { if ($childNode->nodeName === '#text') { array_push($includeTags, ...$this->splitTextNodesAtTags($childNode)); } } } return $includeTags; } /** * Takes a text DOMNode and splits its text content at include tags * into multiple text nodes within the original parent. * Returns found PageIncludeTag references. * @return PageIncludeTag[] */ protected function splitTextNodesAtTags(DOMNode $textNode): array { $includeTags = []; $text = $textNode->textContent; preg_match_all(static::$includeTagRegex, $text, $matches, PREG_OFFSET_CAPTURE); $currentOffset = 0; foreach ($matches[0] as $index => $fullTagMatch) { $tagOuterContent = $fullTagMatch[0]; $tagInnerContent = $matches[1][$index][0]; $tagStartOffset = $fullTagMatch[1]; if ($currentOffset < $tagStartOffset) { $previousText = substr($text, $currentOffset, $tagStartOffset - $currentOffset); $textNode->parentNode->insertBefore(new DOMText($previousText), $textNode); } $node = $textNode->parentNode->insertBefore(new DOMText($tagOuterContent), $textNode); $includeTags[] = new PageIncludeTag($tagInnerContent, $node); $currentOffset = $tagStartOffset + strlen($tagOuterContent); } if ($currentOffset > 0) { $textNode->textContent = substr($text, $currentOffset); } return $includeTags; } /** * Replace the given node with all those in $replacements * @param DOMNode[] $replacements */ protected function replaceNodeWithNodes(DOMNode $toReplace, array $replacements): void { /** @var DOMDocument $targetDoc */ $targetDoc = $toReplace->ownerDocument; foreach ($replacements as $replacement) { if ($replacement->ownerDocument !== $targetDoc) { $replacement = $targetDoc->adoptNode($replacement); } $toReplace->parentNode->insertBefore($replacement, $toReplace); } $toReplace->parentNode->removeChild($toReplace); } /** * Move a tag node to become a sibling of the given parent. * Will attempt to guess a position based upon the tag content within the parent. */ protected function moveTagNodeToBesideParent(PageIncludeTag $tag, DOMNode $parent): void { $parentText = $parent->textContent; $tagPos = strpos($parentText, $tag->tagContent); $before = $tagPos < (strlen($parentText) / 2); if ($before) { $parent->parentNode->insertBefore($tag->domNode, $parent); } else { $parent->parentNode->insertBefore($tag->domNode, $parent->nextSibling); } } /** * Splits the given $parentNode at the location of the $domNode within it. * Attempts replicate the original $parentNode, moving some of their parent * children in where needed, before adding the $domNode between. */ protected function splitNodeAtChildNode(DOMElement $parentNode, DOMNode $domNode): void { $children = [...$parentNode->childNodes]; $splitPos = array_search($domNode, $children, true); if ($splitPos === false) { $splitPos = count($children) - 1; } $parentClone = $parentNode->cloneNode(); $parentNode->parentNode->insertBefore($parentClone, $parentNode); $parentClone->removeAttribute('id'); /** @var DOMNode $child */ for ($i = 0; $i < $splitPos; $i++) { $child = $children[$i]; $parentClone->appendChild($child); } $parentNode->parentNode->insertBefore($domNode, $parentNode); $this->toCleanup[] = $parentNode; $this->toCleanup[] = $parentClone; } /** * Get the parent paragraph of the given node, if existing. */ protected function getParentParagraph(DOMNode $parent): ?DOMNode { do { if (strtolower($parent->nodeName) === 'p') { return $parent; } $parent = $parent->parentElement; } while ($parent !== null); return null; } /** * Cleanup after a parse operation. * Removes stranded elements we may have left during the parse. */ protected function cleanup(): void { foreach ($this->toCleanup as $element) { $element->normalize(); if ($element->parentNode && !$element->hasChildNodes()) { $element->parentNode->removeChild($element); } } } }