From 7d0724e2888f768149b425efcdc185a1c7a4be02 Mon Sep 17 00:00:00 2001
From: Dan Brown <ssddanbrown@googlemail.com>
Date: Fri, 12 Nov 2021 18:03:44 +0000
Subject: [PATCH] Added auto-conversion of search terms to exact values

Will occur when a search term contains a character that's used to split
content into search terms.
Added testing to cover.
---
 app/Entities/Tools/SearchIndex.php   |  8 ++++-
 app/Entities/Tools/SearchOptions.php | 47 +++++++++++++++++++++++-----
 tests/Entity/EntitySearchTest.php    | 12 +++++++
 3 files changed, 59 insertions(+), 8 deletions(-)

diff --git a/app/Entities/Tools/SearchIndex.php b/app/Entities/Tools/SearchIndex.php
index 05de341f9..3c4b5a247 100644
--- a/app/Entities/Tools/SearchIndex.php
+++ b/app/Entities/Tools/SearchIndex.php
@@ -13,6 +13,12 @@ use Illuminate\Support\Collection;
 
 class SearchIndex
 {
+    /**
+     * A list of delimiter characters used to break-up parsed content into terms for indexing.
+     *
+     * @var string
+     */
+    public static $delimiters = " \n\t.,!?:;()[]{}<>`'\"";
 
     /**
      * @var EntityProvider
@@ -189,7 +195,7 @@ class SearchIndex
     protected function textToTermCountMap(string $text): array
     {
         $tokenMap = []; // {TextToken => OccurrenceCount}
-        $splitChars = " \n\t.,!?:;()[]{}<>`'\"";
+        $splitChars = static::$delimiters;
         $token = strtok($text, $splitChars);
 
         while ($token !== false) {
diff --git a/app/Entities/Tools/SearchOptions.php b/app/Entities/Tools/SearchOptions.php
index 39074fb38..9f1b9742d 100644
--- a/app/Entities/Tools/SearchOptions.php
+++ b/app/Entities/Tools/SearchOptions.php
@@ -57,15 +57,22 @@ class SearchOptions
 
         $instance = new SearchOptions();
         $inputs = $request->only(['search', 'types', 'filters', 'exact', 'tags']);
-        $instance->searches = explode(' ', $inputs['search'] ?? []);
-        $instance->exacts = array_filter($inputs['exact'] ?? []);
+
+        $parsedStandardTerms = static::parseStandardTermString($inputs['search'] ?? '');
+        $instance->searches = $parsedStandardTerms['terms'];
+        $instance->exacts = $parsedStandardTerms['exacts'];
+
+        array_push($instance->exacts, ...array_filter($inputs['exact'] ?? []));
+
         $instance->tags = array_filter($inputs['tags'] ?? []);
+
         foreach (($inputs['filters'] ?? []) as $filterKey => $filterVal) {
             if (empty($filterVal)) {
                 continue;
             }
             $instance->filters[$filterKey] = $filterVal === 'true' ? '' : $filterVal;
         }
+
         if (isset($inputs['types']) && count($inputs['types']) < 4) {
             $instance->filters['type'] = implode('|', $inputs['types']);
         }
@@ -102,11 +109,9 @@ class SearchOptions
         }
 
         // Parse standard terms
-        foreach (explode(' ', trim($searchString)) as $searchTerm) {
-            if ($searchTerm !== '') {
-                $terms['searches'][] = $searchTerm;
-            }
-        }
+        $parsedStandardTerms = static::parseStandardTermString($searchString);
+        array_push($terms['searches'], ...$parsedStandardTerms['terms']);
+        array_push($terms['exacts'], ...$parsedStandardTerms['exacts']);
 
         // Split filter values out
         $splitFilters = [];
@@ -119,6 +124,34 @@ class SearchOptions
         return $terms;
     }
 
+
+    /**
+     * Parse a standard search term string into individual search terms and
+     * extract any exact terms searches to be made.
+     *
+     * @return array{terms: array<string>, exacts: array<string>}
+     */
+    protected static function parseStandardTermString(string $termString): array
+    {
+        $terms = explode(' ', $termString);
+        $indexDelimiters = SearchIndex::$delimiters;
+        $parsed = [
+            'terms' => [],
+            'exacts' => [],
+        ];
+
+        foreach ($terms as $searchTerm) {
+            if ($searchTerm === '') {
+                continue;
+            }
+
+            $parsedList = (strpbrk($searchTerm, $indexDelimiters) === false) ? 'terms' : 'exacts';
+            $parsed[$parsedList][] = $searchTerm;
+        }
+
+        return $parsed;
+    }
+
     /**
      * Encode this instance to a search string.
      */
diff --git a/tests/Entity/EntitySearchTest.php b/tests/Entity/EntitySearchTest.php
index 08fabba0c..f69dba211 100644
--- a/tests/Entity/EntitySearchTest.php
+++ b/tests/Entity/EntitySearchTest.php
@@ -119,6 +119,18 @@ class EntitySearchTest extends TestCase
         $exactSearchB->assertStatus(200)->assertDontSee($page->name);
     }
 
+    public function test_search_terms_with_delimiters_are_converted_to_exact_matches()
+    {
+        $this->asEditor();
+        $page = $this->newPage(['name' => 'Delimiter test', 'html' => '<p>1.1 2,2 3?3 4:4 5;5 (8) &lt;9&gt; "10" \'11\' `12`</p>']);
+        $terms = explode(' ', '1.1 2,2 3?3 4:4 5;5 (8) <9> "10" \'11\' `12`');
+
+        foreach ($terms as $term) {
+            $search = $this->get('/search?term=' . urlencode($term));
+            $search->assertSee($page->name);
+        }
+    }
+
     public function test_search_filters()
     {
         $page = $this->newPage(['name' => 'My new test quaffleachits', 'html' => 'this is about an orange donkey danzorbhsing']);