diff --git a/app/services/search_indexer.rb b/app/services/search_indexer.rb index 88aec292ed8..b3c8a8b5ff3 100644 --- a/app/services/search_indexer.rb +++ b/app/services/search_indexer.rb @@ -213,7 +213,9 @@ class SearchIndexer end document.css("a[href]").each do |node| - node.remove_attribute("href") if node["href"] == node.text + if node["href"] == node.text || MENTION_CLASSES.include?(node["class"]) + node.remove_attribute("href") + end end me = new(strip_diacritics: strip_diacritics) @@ -221,6 +223,7 @@ class SearchIndexer me.scrubbed.squish end + MENTION_CLASSES ||= %w{mention mention-group} ATTRIBUTES ||= %w{alt title href data-youtube-title} def start_element(_name, attributes = []) diff --git a/spec/services/search_indexer_spec.rb b/spec/services/search_indexer_spec.rb index faeda4bd5c0..0bbe398ab66 100644 --- a/spec/services/search_indexer_spec.rb +++ b/spec/services/search_indexer_spec.rb @@ -38,6 +38,18 @@ describe SearchIndexer do expect(scrubbed).to eq("http://meta.discourse.org/ link") end + it 'extracts @username from mentions' do + html = '
' + scrubbed = scrub(html) + expect(scrubbed).to eq('@狮子 @foo') + end + + it 'extracts @groupname from group mentions' do + html = '' + scrubbed = scrub(html) + expect(scrubbed).to eq('@автомобилист') + end + it 'uses ignore_accent setting to strip diacritics' do html = "HELLO Hétérogénéité Здравствуйте هتاف للترحيب 你好
"