2021-11-13 06:57:50 +08:00
|
|
|
<?php
|
|
|
|
|
2022-08-16 18:27:22 +08:00
|
|
|
namespace BookStack\Search;
|
2021-11-13 06:57:50 +08:00
|
|
|
|
2023-05-18 00:56:55 +08:00
|
|
|
use BookStack\Activity\Models\Tag;
|
2021-11-13 06:57:50 +08:00
|
|
|
use BookStack\Entities\Models\Entity;
|
|
|
|
use Illuminate\Support\HtmlString;
|
|
|
|
|
|
|
|
class SearchResultsFormatter
|
|
|
|
{
|
|
|
|
/**
|
|
|
|
* For the given array of entities, Prepare the models to be shown in search result
|
|
|
|
* output. This sets a series of additional attributes.
|
2021-11-13 21:28:17 +08:00
|
|
|
*
|
2021-11-13 06:57:50 +08:00
|
|
|
* @param Entity[] $results
|
|
|
|
*/
|
|
|
|
public function format(array $results, SearchOptions $options): void
|
|
|
|
{
|
|
|
|
foreach ($results as $result) {
|
|
|
|
$this->setSearchPreview($result, $options);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Update the given entity model to set attributes used for previews of the item
|
|
|
|
* primarily within search result lists.
|
|
|
|
*/
|
|
|
|
protected function setSearchPreview(Entity $entity, SearchOptions $options)
|
|
|
|
{
|
|
|
|
$textProperty = $entity->textField;
|
|
|
|
$textContent = $entity->$textProperty;
|
|
|
|
$terms = array_merge($options->exacts, $options->searches);
|
|
|
|
|
2021-11-13 20:44:27 +08:00
|
|
|
$originalContentByNewAttribute = [
|
2021-11-13 21:28:17 +08:00
|
|
|
'preview_name' => $entity->name,
|
2021-11-13 20:44:27 +08:00
|
|
|
'preview_content' => $textContent,
|
|
|
|
];
|
|
|
|
|
|
|
|
foreach ($originalContentByNewAttribute as $attributeName => $content) {
|
2021-11-13 22:37:40 +08:00
|
|
|
$targetLength = ($attributeName === 'preview_name') ? 0 : 260;
|
2021-11-13 20:44:27 +08:00
|
|
|
$matchRefs = $this->getMatchPositions($content, $terms);
|
|
|
|
$mergedRefs = $this->sortAndMergeMatchPositions($matchRefs);
|
2021-11-13 22:37:40 +08:00
|
|
|
$formatted = $this->formatTextUsingMatchPositions($mergedRefs, $content, $targetLength);
|
2021-11-13 20:44:27 +08:00
|
|
|
$entity->setAttribute($attributeName, new HtmlString($formatted));
|
|
|
|
}
|
2021-11-13 21:02:32 +08:00
|
|
|
|
|
|
|
$tags = $entity->relationLoaded('tags') ? $entity->tags->all() : [];
|
|
|
|
$this->highlightTagsContainingTerms($tags, $terms);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Highlight tags which match the given terms.
|
2021-11-13 21:28:17 +08:00
|
|
|
*
|
|
|
|
* @param Tag[] $tags
|
2021-11-13 21:02:32 +08:00
|
|
|
* @param string[] $terms
|
|
|
|
*/
|
|
|
|
protected function highlightTagsContainingTerms(array $tags, array $terms): void
|
|
|
|
{
|
|
|
|
foreach ($tags as $tag) {
|
2021-12-15 22:29:43 +08:00
|
|
|
$tagName = mb_strtolower($tag->name);
|
|
|
|
$tagValue = mb_strtolower($tag->value);
|
2021-11-13 21:02:32 +08:00
|
|
|
|
|
|
|
foreach ($terms as $term) {
|
2021-12-15 22:29:43 +08:00
|
|
|
$termLower = mb_strtolower($term);
|
2021-11-13 21:02:32 +08:00
|
|
|
|
2021-12-15 22:29:43 +08:00
|
|
|
if (mb_strpos($tagName, $termLower) !== false) {
|
2021-11-13 21:02:32 +08:00
|
|
|
$tag->setAttribute('highlight_name', true);
|
|
|
|
}
|
|
|
|
|
2021-12-15 22:29:43 +08:00
|
|
|
if (mb_strpos($tagValue, $termLower) !== false) {
|
2021-11-13 21:02:32 +08:00
|
|
|
$tag->setAttribute('highlight_value', true);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-11-13 06:57:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Get positions of the given terms within the given text.
|
|
|
|
* Is in the array format of [int $startIndex => int $endIndex] where the indexes
|
|
|
|
* are positions within the provided text.
|
|
|
|
*
|
|
|
|
* @return array<int, int>
|
|
|
|
*/
|
|
|
|
protected function getMatchPositions(string $text, array $terms): array
|
|
|
|
{
|
|
|
|
$matchRefs = [];
|
2021-12-15 22:29:43 +08:00
|
|
|
$text = mb_strtolower($text);
|
2021-11-13 06:57:50 +08:00
|
|
|
|
|
|
|
foreach ($terms as $term) {
|
|
|
|
$offset = 0;
|
2021-12-15 22:29:43 +08:00
|
|
|
$term = mb_strtolower($term);
|
|
|
|
$pos = mb_strpos($text, $term, $offset);
|
2021-11-13 06:57:50 +08:00
|
|
|
while ($pos !== false) {
|
2021-12-15 22:29:43 +08:00
|
|
|
$end = $pos + mb_strlen($term);
|
2021-11-13 06:57:50 +08:00
|
|
|
$matchRefs[$pos] = $end;
|
|
|
|
$offset = $end;
|
2021-12-15 22:29:43 +08:00
|
|
|
$pos = mb_strpos($text, $term, $offset);
|
2021-11-13 06:57:50 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return $matchRefs;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Sort the given match positions before merging them where they're
|
|
|
|
* adjacent or where they overlap.
|
|
|
|
*
|
|
|
|
* @param array<int, int> $matchPositions
|
2021-11-13 21:28:17 +08:00
|
|
|
*
|
2021-11-13 06:57:50 +08:00
|
|
|
* @return array<int, int>
|
|
|
|
*/
|
|
|
|
protected function sortAndMergeMatchPositions(array $matchPositions): array
|
|
|
|
{
|
|
|
|
ksort($matchPositions);
|
|
|
|
$mergedRefs = [];
|
|
|
|
$lastStart = 0;
|
|
|
|
$lastEnd = 0;
|
|
|
|
|
|
|
|
foreach ($matchPositions as $start => $end) {
|
|
|
|
if ($start > $lastEnd) {
|
|
|
|
$mergedRefs[$start] = $end;
|
|
|
|
$lastStart = $start;
|
|
|
|
$lastEnd = $end;
|
2021-11-13 21:28:17 +08:00
|
|
|
} elseif ($end > $lastEnd) {
|
2021-11-13 06:57:50 +08:00
|
|
|
$mergedRefs[$lastStart] = $end;
|
|
|
|
$lastEnd = $end;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return $mergedRefs;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Format the given original text, returning a version where terms are highlighted within.
|
|
|
|
* Returned content is in HTML text format.
|
2021-11-13 22:37:40 +08:00
|
|
|
* A given $targetLength of 0 asserts no target length limit.
|
|
|
|
*
|
|
|
|
* This is a complex function but written to be relatively efficient, going through the term matches in order
|
|
|
|
* so that we're only doing a one-time loop through of the matches. There is no further searching
|
|
|
|
* done within here.
|
2021-11-13 06:57:50 +08:00
|
|
|
*/
|
2021-11-13 22:37:40 +08:00
|
|
|
protected function formatTextUsingMatchPositions(array $matchPositions, string $originalText, int $targetLength): string
|
2021-11-13 06:57:50 +08:00
|
|
|
{
|
2021-12-15 22:29:43 +08:00
|
|
|
$maxEnd = mb_strlen($originalText);
|
2021-11-13 22:37:40 +08:00
|
|
|
$fetchAll = ($targetLength === 0);
|
2021-11-13 23:04:04 +08:00
|
|
|
$contextLength = ($fetchAll ? 0 : 32);
|
|
|
|
|
|
|
|
$firstStart = null;
|
|
|
|
$lastEnd = 0;
|
2021-11-13 06:57:50 +08:00
|
|
|
$content = '';
|
2021-11-13 22:37:40 +08:00
|
|
|
$contentTextLength = 0;
|
|
|
|
|
|
|
|
if ($fetchAll) {
|
|
|
|
$targetLength = $maxEnd * 2;
|
|
|
|
}
|
2021-11-13 06:57:50 +08:00
|
|
|
|
|
|
|
foreach ($matchPositions as $start => $end) {
|
|
|
|
// Get our outer text ranges for the added context we want to show upon the result.
|
2021-11-13 23:04:04 +08:00
|
|
|
$contextStart = max($start - $contextLength, 0, $lastEnd);
|
|
|
|
$contextEnd = min($end + $contextLength, $maxEnd);
|
2021-11-13 06:57:50 +08:00
|
|
|
|
|
|
|
// Adjust the start if we're going to be touching the previous match.
|
|
|
|
$startDiff = $start - $lastEnd;
|
|
|
|
if ($startDiff < 0) {
|
|
|
|
$contextStart = $start;
|
2021-11-13 22:37:40 +08:00
|
|
|
// Trims off '$startDiff' number of characters to bring it back to the start
|
|
|
|
// if this current match zone.
|
2021-12-15 22:29:43 +08:00
|
|
|
$content = mb_substr($content, 0, mb_strlen($content) + $startDiff);
|
2021-11-13 22:37:40 +08:00
|
|
|
$contentTextLength += $startDiff;
|
2021-11-13 06:57:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Add ellipsis between results
|
2021-11-13 22:37:40 +08:00
|
|
|
if (!$fetchAll && $contextStart !== 0 && $contextStart !== $start) {
|
2021-11-13 06:57:50 +08:00
|
|
|
$content .= ' ...';
|
2021-11-13 22:37:40 +08:00
|
|
|
$contentTextLength += 4;
|
2021-11-14 23:16:18 +08:00
|
|
|
} elseif ($fetchAll) {
|
2021-11-13 22:37:40 +08:00
|
|
|
// Or fill in gap since the previous match
|
|
|
|
$fillLength = $contextStart - $lastEnd;
|
2021-12-15 22:29:43 +08:00
|
|
|
$content .= e(mb_substr($originalText, $lastEnd, $fillLength));
|
2021-11-13 22:37:40 +08:00
|
|
|
$contentTextLength += $fillLength;
|
2021-11-13 06:57:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Add our content including the bolded matching text
|
2021-12-15 22:29:43 +08:00
|
|
|
$content .= e(mb_substr($originalText, $contextStart, $start - $contextStart));
|
2021-11-13 22:37:40 +08:00
|
|
|
$contentTextLength += $start - $contextStart;
|
2021-12-15 22:29:43 +08:00
|
|
|
$content .= '<strong>' . e(mb_substr($originalText, $start, $end - $start)) . '</strong>';
|
2021-11-13 22:37:40 +08:00
|
|
|
$contentTextLength += $end - $start;
|
2021-12-15 22:29:43 +08:00
|
|
|
$content .= e(mb_substr($originalText, $end, $contextEnd - $end));
|
2021-11-13 22:37:40 +08:00
|
|
|
$contentTextLength += $contextEnd - $end;
|
2021-11-13 06:57:50 +08:00
|
|
|
|
|
|
|
// Update our last end position
|
|
|
|
$lastEnd = $contextEnd;
|
|
|
|
|
|
|
|
// Update the first start position if it's not already been set
|
|
|
|
if (is_null($firstStart)) {
|
|
|
|
$firstStart = $contextStart;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Stop if we're near our target
|
2021-11-13 22:37:40 +08:00
|
|
|
if ($contentTextLength >= $targetLength - 10) {
|
2021-11-13 06:57:50 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Just copy out the content if we haven't moved along anywhere.
|
|
|
|
if ($lastEnd === 0) {
|
2021-12-15 22:29:43 +08:00
|
|
|
$content = e(mb_substr($originalText, 0, $targetLength));
|
2021-11-13 22:37:40 +08:00
|
|
|
$contentTextLength = $targetLength;
|
2021-11-13 06:57:50 +08:00
|
|
|
$lastEnd = $targetLength;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Pad out the end if we're low
|
2021-11-13 22:37:40 +08:00
|
|
|
$remainder = $targetLength - $contentTextLength;
|
2021-11-13 06:57:50 +08:00
|
|
|
if ($remainder > 10) {
|
2021-11-13 22:37:40 +08:00
|
|
|
$padEndLength = min($maxEnd - $lastEnd, $remainder);
|
2021-12-15 22:29:43 +08:00
|
|
|
$content .= e(mb_substr($originalText, $lastEnd, $padEndLength));
|
2021-11-13 22:37:40 +08:00
|
|
|
$lastEnd += $padEndLength;
|
|
|
|
$contentTextLength += $padEndLength;
|
2021-11-13 06:57:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Pad out the start if we're still low
|
2021-11-13 22:37:40 +08:00
|
|
|
$remainder = $targetLength - $contentTextLength;
|
2021-11-13 06:57:50 +08:00
|
|
|
$firstStart = $firstStart ?: 0;
|
2021-11-13 22:37:40 +08:00
|
|
|
if (!$fetchAll && $remainder > 10 && $firstStart !== 0) {
|
2021-11-13 06:57:50 +08:00
|
|
|
$padStart = max(0, $firstStart - $remainder);
|
2021-12-15 22:29:43 +08:00
|
|
|
$content = ($padStart === 0 ? '' : '...') . e(mb_substr($originalText, $padStart, $firstStart - $padStart)) . mb_substr($content, 4);
|
2021-11-13 06:57:50 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Add ellipsis if we're not at the end
|
|
|
|
if ($lastEnd < $maxEnd) {
|
|
|
|
$content .= '...';
|
|
|
|
}
|
|
|
|
|
|
|
|
return $content;
|
|
|
|
}
|
2021-11-13 21:28:17 +08:00
|
|
|
}
|