diff --git a/app/services/search_indexer.rb b/app/services/search_indexer.rb index e294f41131a..66fb5fd71ae 100644 --- a/app/services/search_indexer.rb +++ b/app/services/search_indexer.rb @@ -21,7 +21,10 @@ class SearchIndexer foreign_key = "#{table}_id" # insert some extra words for I.am.a.word so "word" is tokenized - search_data = raw_data.gsub(/\p{L}*\.\p{L}*/) do |with_dot| + # I.am.a.word becomes I.am.a.word am a word + # uses \p{L} which matchs a single code point in category letter + # uses \p{N} which matchs a single code point in category number + search_data = raw_data.gsub(/(\p{L}|\p{N}|_|-|\.)*\.(\p{L}|\p{N}|_|-|\.)*/) do |with_dot| split = with_dot.split(".") if split.length > 1 with_dot + (" " << split[1..-1].join(" ")) diff --git a/spec/components/search_spec.rb b/spec/components/search_spec.rb index 0b858163954..2c2b3c32555 100644 --- a/spec/components/search_spec.rb +++ b/spec/components/search_spec.rb @@ -703,6 +703,11 @@ describe Search do expect(Search.execute('bill').posts.map(&:id)).to eq([post.id]) end + it 'can tokanize website names correctly' do + post = Fabricate(:post, raw: 'i like wb.camra.org.uk so yay') + expect(Search.execute('wb.camra.org.uk').posts.map(&:id)).to eq([post.id]) + end + it 'supports category slug and tags' do # main category category = Fabricate(:category, name: 'category 24', slug: 'category-24')