Added auto-conversion of search terms to exact values

Will occur when a search term contains a character that's used to split
content into search terms.
Added testing to cover.
This commit is contained in:
Dan Brown 2021-11-12 18:03:44 +00:00
parent 99587a0be6
commit 7d0724e288
No known key found for this signature in database
GPG Key ID: 46D9F943C24A2EF9
3 changed files with 59 additions and 8 deletions

@ -13,6 +13,12 @@ use Illuminate\Support\Collection;
class SearchIndex class SearchIndex
{ {
/**
* A list of delimiter characters used to break-up parsed content into terms for indexing.
*
* @var string
*/
public static $delimiters = " \n\t.,!?:;()[]{}<>`'\"";
/** /**
* @var EntityProvider * @var EntityProvider
@ -189,7 +195,7 @@ class SearchIndex
protected function textToTermCountMap(string $text): array protected function textToTermCountMap(string $text): array
{ {
$tokenMap = []; // {TextToken => OccurrenceCount} $tokenMap = []; // {TextToken => OccurrenceCount}
$splitChars = " \n\t.,!?:;()[]{}<>`'\""; $splitChars = static::$delimiters;
$token = strtok($text, $splitChars); $token = strtok($text, $splitChars);
while ($token !== false) { while ($token !== false) {

@ -57,15 +57,22 @@ class SearchOptions
$instance = new SearchOptions(); $instance = new SearchOptions();
$inputs = $request->only(['search', 'types', 'filters', 'exact', 'tags']); $inputs = $request->only(['search', 'types', 'filters', 'exact', 'tags']);
$instance->searches = explode(' ', $inputs['search'] ?? []);
$instance->exacts = array_filter($inputs['exact'] ?? []); $parsedStandardTerms = static::parseStandardTermString($inputs['search'] ?? '');
$instance->searches = $parsedStandardTerms['terms'];
$instance->exacts = $parsedStandardTerms['exacts'];
array_push($instance->exacts, ...array_filter($inputs['exact'] ?? []));
$instance->tags = array_filter($inputs['tags'] ?? []); $instance->tags = array_filter($inputs['tags'] ?? []);
foreach (($inputs['filters'] ?? []) as $filterKey => $filterVal) { foreach (($inputs['filters'] ?? []) as $filterKey => $filterVal) {
if (empty($filterVal)) { if (empty($filterVal)) {
continue; continue;
} }
$instance->filters[$filterKey] = $filterVal === 'true' ? '' : $filterVal; $instance->filters[$filterKey] = $filterVal === 'true' ? '' : $filterVal;
} }
if (isset($inputs['types']) && count($inputs['types']) < 4) { if (isset($inputs['types']) && count($inputs['types']) < 4) {
$instance->filters['type'] = implode('|', $inputs['types']); $instance->filters['type'] = implode('|', $inputs['types']);
} }
@ -102,11 +109,9 @@ class SearchOptions
} }
// Parse standard terms // Parse standard terms
foreach (explode(' ', trim($searchString)) as $searchTerm) { $parsedStandardTerms = static::parseStandardTermString($searchString);
if ($searchTerm !== '') { array_push($terms['searches'], ...$parsedStandardTerms['terms']);
$terms['searches'][] = $searchTerm; array_push($terms['exacts'], ...$parsedStandardTerms['exacts']);
}
}
// Split filter values out // Split filter values out
$splitFilters = []; $splitFilters = [];
@ -119,6 +124,34 @@ class SearchOptions
return $terms; return $terms;
} }
/**
* Parse a standard search term string into individual search terms and
* extract any exact terms searches to be made.
*
* @return array{terms: array<string>, exacts: array<string>}
*/
protected static function parseStandardTermString(string $termString): array
{
$terms = explode(' ', $termString);
$indexDelimiters = SearchIndex::$delimiters;
$parsed = [
'terms' => [],
'exacts' => [],
];
foreach ($terms as $searchTerm) {
if ($searchTerm === '') {
continue;
}
$parsedList = (strpbrk($searchTerm, $indexDelimiters) === false) ? 'terms' : 'exacts';
$parsed[$parsedList][] = $searchTerm;
}
return $parsed;
}
/** /**
* Encode this instance to a search string. * Encode this instance to a search string.
*/ */

@ -119,6 +119,18 @@ class EntitySearchTest extends TestCase
$exactSearchB->assertStatus(200)->assertDontSee($page->name); $exactSearchB->assertStatus(200)->assertDontSee($page->name);
} }
public function test_search_terms_with_delimiters_are_converted_to_exact_matches()
{
$this->asEditor();
$page = $this->newPage(['name' => 'Delimiter test', 'html' => '<p>1.1 2,2 3?3 4:4 5;5 (8) &lt;9&gt; "10" \'11\' `12`</p>']);
$terms = explode(' ', '1.1 2,2 3?3 4:4 5;5 (8) <9> "10" \'11\' `12`');
foreach ($terms as $term) {
$search = $this->get('/search?term=' . urlencode($term));
$search->assertSee($page->name);
}
}
public function test_search_filters() public function test_search_filters()
{ {
$page = $this->newPage(['name' => 'My new test quaffleachits', 'html' => 'this is about an orange donkey danzorbhsing']); $page = $this->newPage(['name' => 'My new test quaffleachits', 'html' => 'this is about an orange donkey danzorbhsing']);