mirror of
https://github.com/discourse/discourse.git
synced 2024-11-23 02:19:27 +08:00
FIX: search indexer had various cases where it could fail
Previous to this fix is a post had the test www.test.com/abc it would fail to index. This also simplifies the rules to avoid full url parsing which can be expensive
This commit is contained in:
parent
bb98785569
commit
6428aa5b1f
|
@ -21,16 +21,13 @@ class SearchIndexer
|
|||
# insert some extra words for I.am.a.word so "word" is tokenized
|
||||
# I.am.a.word becomes I.am.a.word am a word
|
||||
raw.gsub(/[^[:space:]]*[\.]+[^[:space:]]*/) do |with_dot|
|
||||
if with_dot.match?(PlainTextToMarkdown::URL_REGEX)
|
||||
"#{with_dot} #{URI.parse(with_dot).hostname.gsub('.', ' ')}"
|
||||
else
|
||||
split = with_dot.split(".")
|
||||
|
||||
if split.length > 1
|
||||
with_dot + ((+" ") << split[1..-1].join(" "))
|
||||
else
|
||||
with_dot
|
||||
end
|
||||
split = with_dot.split(/https?:\/\/|[?:;,.\/]/)
|
||||
|
||||
if split.length > 1
|
||||
with_dot + ((+" ") << split[1..-1].reject { |x| x.blank? }.join(" "))
|
||||
else
|
||||
with_dot
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -17,6 +17,16 @@ describe SearchIndexer do
|
|||
SearchIndexer.scrub_html_for_search(html, strip_diacritics: strip_diacritics)
|
||||
end
|
||||
|
||||
it 'can correctly inject if http or https links exist' do
|
||||
|
||||
val = "a https://cnn.com?bob=1, http://stuff.com.au?bill=1 b abc.net/xyz=1"
|
||||
result = SearchIndexer.inject_extra_terms(val)
|
||||
|
||||
expected = "a https://cnn.com?bob=1, cnn com bob=1 http://stuff.com.au?bill=1 stuff com au bill=1 b abc.net/xyz=1 net xyz=1"
|
||||
|
||||
expect(result).to eq(expected)
|
||||
end
|
||||
|
||||
it 'correctly indexes chinese' do
|
||||
SiteSetting.default_locale = 'zh_CN'
|
||||
data = "你好世界"
|
||||
|
@ -141,7 +151,7 @@ describe SearchIndexer do
|
|||
topic = post.topic
|
||||
|
||||
expect(post.post_search_data.raw_data).to eq(
|
||||
"#{topic.title} #{topic.category.name} https://meta.discourse.org/some.png meta discourse org"
|
||||
"#{topic.title} #{topic.category.name} https://meta.discourse.org/some.png meta discourse org some png"
|
||||
)
|
||||
end
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user