From ae520b62e4a1aff3a4f025700076ea6594855857 Mon Sep 17 00:00:00 2001 From: Sam Saffron Date: Thu, 25 Jun 2020 13:36:52 +1000 Subject: [PATCH] FEATURE: allow disabling of extra term injection in search There is a feature in search where we take over from the tokenizer in postgres and attempt to inject more words into search. So for example: sam.i.am will inject the words i and am. This is not ideal cause there are many edge cases and this can cause extreme index bloat. This is an opening move commit to make it configurable, over the next few weeks we will evaluate and decide if we disable this by default or simply remove. --- app/services/search_indexer.rb | 2 ++ config/site_settings.yml | 3 +++ 2 files changed, 5 insertions(+) diff --git a/app/services/search_indexer.rb b/app/services/search_indexer.rb index ceeaaf27f25..09193fff094 100644 --- a/app/services/search_indexer.rb +++ b/app/services/search_indexer.rb @@ -17,6 +17,8 @@ class SearchIndexer end def self.inject_extra_terms(raw) + return raw if !SiteSetting.search_inject_extra_terms + # insert some extra words for I.am.a.word so "word" is tokenized # I.am.a.word becomes I.am.a.word am a word raw.gsub(/[^[:space:]]*[\.]+[^[:space:]]*/) do |with_dot| diff --git a/config/site_settings.yml b/config/site_settings.yml index e8cd6f7d7aa..079232c0a93 100644 --- a/config/site_settings.yml +++ b/config/site_settings.yml @@ -1730,6 +1730,9 @@ backups: hidden: true search: + search_inject_extra_terms: + default: true + hidden: true min_search_term_length: client: true default: 3