2023-11-23 06:14:28 +08:00
|
|
|
<?php
|
|
|
|
|
|
|
|
namespace BookStack\Entities\Tools;
|
|
|
|
|
|
|
|
use BookStack\Util\HtmlDocument;
|
|
|
|
use Closure;
|
2023-11-23 22:29:07 +08:00
|
|
|
use DOMNode;
|
|
|
|
use DOMText;
|
2023-11-23 06:14:28 +08:00
|
|
|
|
|
|
|
class PageIncludeParser
|
|
|
|
{
|
|
|
|
protected static string $includeTagRegex = "/{{@\s?([0-9].*?)}}/";
|
|
|
|
|
|
|
|
public function __construct(
|
|
|
|
protected string $pageHtml,
|
|
|
|
protected Closure $pageContentForId,
|
|
|
|
) {
|
|
|
|
}
|
|
|
|
|
|
|
|
public function parse(): string
|
|
|
|
{
|
2023-11-23 22:29:07 +08:00
|
|
|
$doc = new HtmlDocument($this->pageHtml);
|
2023-11-23 06:14:28 +08:00
|
|
|
|
2023-11-23 22:29:07 +08:00
|
|
|
$tags = $this->locateAndIsolateIncludeTags($doc);
|
2023-11-23 06:14:28 +08:00
|
|
|
|
2023-11-23 22:29:07 +08:00
|
|
|
foreach ($tags as $tag) {
|
|
|
|
$htmlContent = $this->pageContentForId->call($this, $tag->getPageId());
|
|
|
|
$content = new PageIncludeContent($htmlContent, $tag);
|
|
|
|
|
|
|
|
if ($content->isInline()) {
|
|
|
|
$adopted = $doc->adoptNodes($content->toDomNodes());
|
|
|
|
foreach ($adopted as $adoptedContentNode) {
|
|
|
|
$tag->domNode->parentNode->insertBefore($adoptedContentNode, $tag->domNode);
|
|
|
|
}
|
|
|
|
$tag->domNode->parentNode->removeChild($tag->domNode);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO - Non-inline
|
|
|
|
}
|
2023-11-23 06:14:28 +08:00
|
|
|
|
|
|
|
// TODO:
|
|
|
|
// Hunt down the specific text nodes with matches
|
|
|
|
// Split out tag text node from rest of content
|
|
|
|
// Fetch tag content->
|
|
|
|
// If range or top-block: delete tag text node, [Promote to top-block], delete old top-block if empty
|
|
|
|
// If inline: Replace current text node with new text or elem
|
|
|
|
// !! "Range" or "inline" status should come from tag parser and content fetcher, not guessed direct from content
|
|
|
|
// since we could have a range of inline elements
|
|
|
|
|
|
|
|
// [Promote to top-block]
|
|
|
|
// Tricky operation.
|
|
|
|
// Can throw in before or after current top-block depending on relative position
|
|
|
|
// Could [Split] top-block but complex past a single level depth.
|
|
|
|
// Maybe [Split] if one level depth, otherwise default to before/after block
|
|
|
|
// Should work for the vast majority of cases, and not for those which would
|
|
|
|
// technically be invalid in-editor anyway.
|
|
|
|
|
|
|
|
// [Split]
|
|
|
|
// Copy original top-block node type and attrs (apart from ID)
|
|
|
|
// Move nodes after promoted tag-node into copy
|
|
|
|
// Insert copy after original (after promoted top-block eventually)
|
|
|
|
|
|
|
|
// Notes: May want to eventually parse through backwards, which should avoid issues
|
|
|
|
// in changes affecting the next tag, where tags may be in the same/adjacent nodes.
|
|
|
|
|
|
|
|
|
2023-11-23 22:29:07 +08:00
|
|
|
return $doc->getBodyInnerHtml();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Locate include tags within the given document, isolating them to their
|
|
|
|
* own nodes in the DOM for future targeted manipulation.
|
|
|
|
* @return PageIncludeTag[]
|
|
|
|
*/
|
|
|
|
protected function locateAndIsolateIncludeTags(HtmlDocument $doc): array
|
|
|
|
{
|
|
|
|
$includeHosts = $doc->queryXPath("//body//*[contains(text(), '{{@')]");
|
|
|
|
$includeTags = [];
|
|
|
|
|
|
|
|
/** @var DOMNode $node */
|
|
|
|
/** @var DOMNode $childNode */
|
|
|
|
foreach ($includeHosts as $node) {
|
|
|
|
foreach ($node->childNodes as $childNode) {
|
|
|
|
if ($childNode->nodeName === '#text') {
|
|
|
|
array_push($includeTags, ...$this->splitTextNodesAtTags($childNode));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return $includeTags;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Takes a text DOMNode and splits its text content at include tags
|
|
|
|
* into multiple text nodes within the original parent.
|
|
|
|
* Returns found PageIncludeTag references.
|
|
|
|
* @return PageIncludeTag[]
|
|
|
|
*/
|
|
|
|
protected function splitTextNodesAtTags(DOMNode $textNode): array
|
|
|
|
{
|
|
|
|
$includeTags = [];
|
|
|
|
$text = $textNode->textContent;
|
|
|
|
preg_match_all(static::$includeTagRegex, $text, $matches, PREG_OFFSET_CAPTURE);
|
|
|
|
|
|
|
|
$currentOffset = 0;
|
|
|
|
foreach ($matches[0] as $index => $fullTagMatch) {
|
|
|
|
$tagOuterContent = $fullTagMatch[0];
|
|
|
|
$tagInnerContent = $matches[1][$index][0];
|
|
|
|
$tagStartOffset = $fullTagMatch[1];
|
|
|
|
|
|
|
|
if ($currentOffset < $tagStartOffset) {
|
|
|
|
$previousText = substr($text, $currentOffset, $tagStartOffset - $currentOffset);
|
|
|
|
$textNode->parentNode->insertBefore(new DOMText($previousText), $textNode);
|
|
|
|
}
|
|
|
|
|
|
|
|
$node = $textNode->parentNode->insertBefore(new DOMText($tagOuterContent), $textNode);
|
|
|
|
$includeTags[] = new PageIncludeTag($tagInnerContent, $node);
|
|
|
|
$currentOffset = $tagStartOffset + strlen($tagOuterContent);
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($currentOffset > 0) {
|
|
|
|
$textNode->textContent = substr($text, $currentOffset);
|
|
|
|
}
|
|
|
|
|
|
|
|
return $includeTags;
|
2023-11-23 06:14:28 +08:00
|
|
|
}
|
|
|
|
}
|