From 7d0724e2888f768149b425efcdc185a1c7a4be02 Mon Sep 17 00:00:00 2001 From: Dan Brown <ssddanbrown@googlemail.com> Date: Fri, 12 Nov 2021 18:03:44 +0000 Subject: [PATCH] Added auto-conversion of search terms to exact values Will occur when a search term contains a character that's used to split content into search terms. Added testing to cover. --- app/Entities/Tools/SearchIndex.php | 8 ++++- app/Entities/Tools/SearchOptions.php | 47 +++++++++++++++++++++++----- tests/Entity/EntitySearchTest.php | 12 +++++++ 3 files changed, 59 insertions(+), 8 deletions(-) diff --git a/app/Entities/Tools/SearchIndex.php b/app/Entities/Tools/SearchIndex.php index 05de341f9..3c4b5a247 100644 --- a/app/Entities/Tools/SearchIndex.php +++ b/app/Entities/Tools/SearchIndex.php @@ -13,6 +13,12 @@ use Illuminate\Support\Collection; class SearchIndex { + /** + * A list of delimiter characters used to break-up parsed content into terms for indexing. + * + * @var string + */ + public static $delimiters = " \n\t.,!?:;()[]{}<>`'\""; /** * @var EntityProvider @@ -189,7 +195,7 @@ class SearchIndex protected function textToTermCountMap(string $text): array { $tokenMap = []; // {TextToken => OccurrenceCount} - $splitChars = " \n\t.,!?:;()[]{}<>`'\""; + $splitChars = static::$delimiters; $token = strtok($text, $splitChars); while ($token !== false) { diff --git a/app/Entities/Tools/SearchOptions.php b/app/Entities/Tools/SearchOptions.php index 39074fb38..9f1b9742d 100644 --- a/app/Entities/Tools/SearchOptions.php +++ b/app/Entities/Tools/SearchOptions.php @@ -57,15 +57,22 @@ class SearchOptions $instance = new SearchOptions(); $inputs = $request->only(['search', 'types', 'filters', 'exact', 'tags']); - $instance->searches = explode(' ', $inputs['search'] ?? []); - $instance->exacts = array_filter($inputs['exact'] ?? []); + + $parsedStandardTerms = static::parseStandardTermString($inputs['search'] ?? ''); + $instance->searches = $parsedStandardTerms['terms']; + $instance->exacts = $parsedStandardTerms['exacts']; + + array_push($instance->exacts, ...array_filter($inputs['exact'] ?? [])); + $instance->tags = array_filter($inputs['tags'] ?? []); + foreach (($inputs['filters'] ?? []) as $filterKey => $filterVal) { if (empty($filterVal)) { continue; } $instance->filters[$filterKey] = $filterVal === 'true' ? '' : $filterVal; } + if (isset($inputs['types']) && count($inputs['types']) < 4) { $instance->filters['type'] = implode('|', $inputs['types']); } @@ -102,11 +109,9 @@ class SearchOptions } // Parse standard terms - foreach (explode(' ', trim($searchString)) as $searchTerm) { - if ($searchTerm !== '') { - $terms['searches'][] = $searchTerm; - } - } + $parsedStandardTerms = static::parseStandardTermString($searchString); + array_push($terms['searches'], ...$parsedStandardTerms['terms']); + array_push($terms['exacts'], ...$parsedStandardTerms['exacts']); // Split filter values out $splitFilters = []; @@ -119,6 +124,34 @@ class SearchOptions return $terms; } + + /** + * Parse a standard search term string into individual search terms and + * extract any exact terms searches to be made. + * + * @return array{terms: array<string>, exacts: array<string>} + */ + protected static function parseStandardTermString(string $termString): array + { + $terms = explode(' ', $termString); + $indexDelimiters = SearchIndex::$delimiters; + $parsed = [ + 'terms' => [], + 'exacts' => [], + ]; + + foreach ($terms as $searchTerm) { + if ($searchTerm === '') { + continue; + } + + $parsedList = (strpbrk($searchTerm, $indexDelimiters) === false) ? 'terms' : 'exacts'; + $parsed[$parsedList][] = $searchTerm; + } + + return $parsed; + } + /** * Encode this instance to a search string. */ diff --git a/tests/Entity/EntitySearchTest.php b/tests/Entity/EntitySearchTest.php index 08fabba0c..f69dba211 100644 --- a/tests/Entity/EntitySearchTest.php +++ b/tests/Entity/EntitySearchTest.php @@ -119,6 +119,18 @@ class EntitySearchTest extends TestCase $exactSearchB->assertStatus(200)->assertDontSee($page->name); } + public function test_search_terms_with_delimiters_are_converted_to_exact_matches() + { + $this->asEditor(); + $page = $this->newPage(['name' => 'Delimiter test', 'html' => '<p>1.1 2,2 3?3 4:4 5;5 (8) <9> "10" \'11\' `12`</p>']); + $terms = explode(' ', '1.1 2,2 3?3 4:4 5;5 (8) <9> "10" \'11\' `12`'); + + foreach ($terms as $term) { + $search = $this->get('/search?term=' . urlencode($term)); + $search->assertSee($page->name); + } + } + public function test_search_filters() { $page = $this->newPage(['name' => 'My new test quaffleachits', 'html' => 'this is about an orange donkey danzorbhsing']);