mirror of
https://github.com/discourse/discourse.git
synced 2025-03-20 18:35:35 +08:00
FIX: Relevance search will now consider document length in ranking.
The default ranking options ranks by the number of matches which is highly problematic when posts are stuffed with a keyword. The ranking will now be divided by the document length which is a much fairer way to rank.
This commit is contained in:
parent
cadd1d670f
commit
e87ca59401
@ -838,13 +838,14 @@ class Search
|
||||
posts = posts.order("posts.like_count DESC")
|
||||
end
|
||||
else
|
||||
# 0|32 default normalization scaled into the range zero to one
|
||||
# 2|32 divides the rank by the document length and scales the range from
|
||||
# zero to one
|
||||
data_ranking = <<~SQL
|
||||
(
|
||||
TS_RANK_CD(
|
||||
post_search_data.search_data,
|
||||
#{ts_query(weight_filter: weights)},
|
||||
0|32
|
||||
2|32
|
||||
) *
|
||||
(
|
||||
CASE categories.search_priority
|
||||
|
@ -334,6 +334,27 @@ describe Search do
|
||||
expect(result.posts).to contain_exactly(reply)
|
||||
expect(result.blurb(reply)).to eq(expected_blurb)
|
||||
end
|
||||
|
||||
it 'does not allow a post with repeated words to dominate the ranking' do
|
||||
category = Fabricate(:category, name: "winter is coming")
|
||||
|
||||
post = Fabricate(:post,
|
||||
raw: "I think winter will end soon",
|
||||
topic: Fabricate(:topic,
|
||||
title: "dragon john snow winter",
|
||||
category: category
|
||||
)
|
||||
)
|
||||
|
||||
post2 = Fabricate(:post,
|
||||
raw: "I think winter winter winter winter winter will end soon",
|
||||
topic: Fabricate(:topic, title: "dragon john snow summer", category: category)
|
||||
)
|
||||
|
||||
result = Search.execute('winter')
|
||||
|
||||
expect(result.posts).to eq([post, post2, category.topic.first_post])
|
||||
end
|
||||
end
|
||||
|
||||
context 'searching for quoted title' do
|
||||
@ -940,22 +961,45 @@ describe Search do
|
||||
today = Date.today
|
||||
yesterday = 1.day.ago
|
||||
two_days_ago = 2.days.ago
|
||||
category = Fabricate(:category)
|
||||
|
||||
old_topic = Fabricate(:topic,
|
||||
title: 'First Topic, testing the created_at sort',
|
||||
created_at: two_days_ago,
|
||||
category: category
|
||||
)
|
||||
|
||||
old_topic = Fabricate(:topic,
|
||||
title: 'First Topic, testing the created_at sort',
|
||||
created_at: two_days_ago)
|
||||
latest_topic = Fabricate(:topic,
|
||||
title: 'Second Topic, testing the created_at sort',
|
||||
created_at: yesterday)
|
||||
title: 'Second Topic, testing the created_at sort',
|
||||
created_at: yesterday,
|
||||
category: category
|
||||
)
|
||||
|
||||
old_relevant_topic_post = Fabricate(:post, topic: old_topic, created_at: yesterday, raw: 'Relevant Topic')
|
||||
latest_irelevant_topic_post = Fabricate(:post, topic: latest_topic, created_at: today, raw: 'Not Relevant')
|
||||
old_relevant_topic_post = Fabricate(:post,
|
||||
topic: old_topic,
|
||||
created_at: yesterday,
|
||||
raw: 'Relevant Relevant Topic'
|
||||
)
|
||||
|
||||
latest_irelevant_topic_post = Fabricate(:post,
|
||||
topic: latest_topic,
|
||||
created_at: today,
|
||||
raw: 'Not Relevant'
|
||||
)
|
||||
|
||||
# Expecting the default results
|
||||
expect(Search.execute('Topic').posts.map(&:id)).to eq([old_relevant_topic_post.id, latest_irelevant_topic_post.id])
|
||||
expect(Search.execute('Topic').posts).to contain_exactly(
|
||||
old_relevant_topic_post,
|
||||
latest_irelevant_topic_post,
|
||||
category.topic.first_post
|
||||
)
|
||||
|
||||
# Expecting the ordered by topic creation results
|
||||
expect(Search.execute('Topic order:latest_topic').posts.map(&:id)).to eq([latest_irelevant_topic_post.id, old_relevant_topic_post.id])
|
||||
expect(Search.execute('Topic order:latest_topic').posts).to contain_exactly(
|
||||
latest_irelevant_topic_post,
|
||||
old_relevant_topic_post,
|
||||
category.topic.first_post
|
||||
)
|
||||
end
|
||||
|
||||
it 'can tokenize dots' do
|
||||
|
Loading…
x
Reference in New Issue
Block a user