FIX: Exclude automatic anchors from search index (#16396)

This commit is contained in:
Penar Musaraj 2022-04-06 16:06:45 -04:00 committed by GitHub
parent 6e9a068e44
commit df10a27067
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 10 additions and 0 deletions

View File

@ -337,6 +337,10 @@ class SearchIndexer
if node["href"] == node.text || MENTION_CLASSES.include?(node["class"]) if node["href"] == node.text || MENTION_CLASSES.include?(node["class"])
node.remove_attribute("href") node.remove_attribute("href")
end end
if node["class"] == "anchor" && node["href"].starts_with?("#")
node.remove_attribute("href")
end
end end
html_scrubber = new html_scrubber = new

View File

@ -42,6 +42,12 @@ describe SearchIndexer do
expect(scrubbed).to eq("http://meta.discourse.org/ link") expect(scrubbed).to eq("http://meta.discourse.org/ link")
end end
it 'ignores autogenerated link anchors' do
html = "<a class='anchor' href='#something-special'>something special</a>"
scrubbed = SearchIndexer::HtmlScrubber.scrub(html)
expect(scrubbed).to eq("something special")
end
it 'extracts @username from mentions' do it 'extracts @username from mentions' do
html = '<p><a class="mention" href="/u/%E7%8B%AE%E5%AD%90">@狮子</a> <a class="mention" href="/u/foo">@foo</a></p>' html = '<p><a class="mention" href="/u/%E7%8B%AE%E5%AD%90">@狮子</a> <a class="mention" href="/u/foo">@foo</a></p>'
scrubbed = SearchIndexer::HtmlScrubber.scrub(html) scrubbed = SearchIndexer::HtmlScrubber.scrub(html)