mirror of
https://github.com/discourse/discourse.git
synced 2025-02-16 23:12:45 +08:00
PERF: Use PG headlines for blurb generation and highlighting for search.
This commit is contained in:
parent
ba482c251c
commit
2193d02433
|
@ -1792,6 +1792,9 @@ backups:
|
|||
hidden: true
|
||||
|
||||
search:
|
||||
use_pg_headlines_for_excerpt:
|
||||
default: false
|
||||
hidden: true
|
||||
search_ranking_normalization:
|
||||
default: "0"
|
||||
hidden: true
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
class Search
|
||||
DIACRITICS ||= /([\u0300-\u036f]|[\u1AB0-\u1AFF]|[\u1DC0-\u1DFF]|[\u20D0-\u20FF])/
|
||||
HIGHLIGHT_CSS_CLASS = 'search-highlight'
|
||||
|
||||
cattr_accessor :preloaded_topic_custom_fields
|
||||
self.preloaded_topic_custom_fields = Set.new
|
||||
|
@ -726,12 +727,18 @@ class Search
|
|||
def single_topic(id)
|
||||
if @opts[:restrict_to_archetype].present?
|
||||
archetype = @opts[:restrict_to_archetype] == Archetype.default ? Archetype.default : Archetype.private_message
|
||||
post = Post.joins(:topic)
|
||||
.where("topics.id = :id AND topics.archetype = :archetype AND posts.post_number = 1", id: id, archetype: archetype)
|
||||
.first
|
||||
|
||||
post = posts_scope
|
||||
.joins(:topic)
|
||||
.find_by(
|
||||
"topics.id = :id AND topics.archetype = :archetype AND posts.post_number = 1",
|
||||
id: id,
|
||||
archetype: archetype
|
||||
)
|
||||
else
|
||||
post = Post.find_by(topic_id: id, post_number: 1)
|
||||
post = posts_scope.find_by(topic_id: id, post_number: 1)
|
||||
end
|
||||
|
||||
return nil unless @guardian.can_see?(post)
|
||||
|
||||
@results.add(post)
|
||||
|
@ -1096,7 +1103,7 @@ class Search
|
|||
def aggregate_posts(post_sql)
|
||||
return [] unless post_sql
|
||||
|
||||
posts_eager_loads(Post)
|
||||
posts_scope(posts_eager_loads(Post))
|
||||
.joins("JOIN (#{post_sql}) x ON x.id = posts.topic_id AND x.post_number = posts.post_number")
|
||||
.order('row_number')
|
||||
end
|
||||
|
@ -1128,7 +1135,7 @@ class Search
|
|||
|
||||
def topic_search
|
||||
if @search_context.is_a?(Topic)
|
||||
posts = posts_eager_loads(posts_query(limit))
|
||||
posts = posts_scope(posts_eager_loads(posts_query(limit)))
|
||||
.where('posts.topic_id = ?', @search_context.id)
|
||||
|
||||
posts.each do |post|
|
||||
|
@ -1150,4 +1157,17 @@ class Search
|
|||
query.includes(topic: topic_eager_loads)
|
||||
end
|
||||
|
||||
def posts_scope(default_scope = Post.all)
|
||||
if SiteSetting.use_pg_headlines_for_excerpt
|
||||
default_scope
|
||||
.joins("INNER JOIN post_search_data pd ON pd.post_id = posts.id")
|
||||
.select(
|
||||
"TS_HEADLINE(#{default_ts_config}, pd.raw_data, PLAINTO_TSQUERY('#{@term.present? ? PG::Connection.escape_string(@term) : nil}'), 'ShortWord=0, MaxFragments=1, MinWords=50, MaxWords=51, StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>''') AS headline",
|
||||
default_scope.arel.projections
|
||||
)
|
||||
else
|
||||
default_scope
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
|
|
@ -85,8 +85,12 @@ class Search
|
|||
}
|
||||
|
||||
if post.post_search_data.version > SearchIndexer::MIN_POST_REINDEX_VERSION
|
||||
opts[:cooked] = post.post_search_data.raw_data
|
||||
opts[:scrub] = false
|
||||
if SiteSetting.use_pg_headlines_for_excerpt
|
||||
return post.headline
|
||||
else
|
||||
opts[:cooked] = post.post_search_data.raw_data
|
||||
opts[:scrub] = false
|
||||
end
|
||||
else
|
||||
opts[:cooked] = post.cooked
|
||||
end
|
||||
|
|
|
@ -410,27 +410,31 @@ describe Search do
|
|||
end
|
||||
|
||||
let(:expected_blurb) do
|
||||
"...quire content longer than the typical test post raw content. It really is some long content, folks. elephant"
|
||||
"hundred characters to satisfy any test conditions that require content longer than the typical test post raw content. It really is some long content, folks. <span class=\"search-highlight\">elephant</span>"
|
||||
end
|
||||
|
||||
it 'returns the post' do
|
||||
SiteSetting.use_pg_headlines_for_excerpt = true
|
||||
|
||||
result = Search.execute('elephant',
|
||||
type_filter: 'topic',
|
||||
include_blurbs: true
|
||||
)
|
||||
|
||||
expect(result.posts).to contain_exactly(reply)
|
||||
expect(result.blurb(reply)).to eq(expected_blurb)
|
||||
expect(result.posts.map(&:id)).to contain_exactly(reply.id)
|
||||
expect(result.blurb(result.posts.first)).to eq(expected_blurb)
|
||||
end
|
||||
|
||||
it 'returns the right post and blurb for searches with phrase' do
|
||||
SiteSetting.use_pg_headlines_for_excerpt = true
|
||||
|
||||
result = Search.execute('"elephant"',
|
||||
type_filter: 'topic',
|
||||
include_blurbs: true
|
||||
)
|
||||
|
||||
expect(result.posts).to contain_exactly(reply)
|
||||
expect(result.blurb(reply)).to eq(expected_blurb)
|
||||
expect(result.posts.map(&:id)).to contain_exactly(reply.id)
|
||||
expect(result.blurb(result.posts.first)).to eq(expected_blurb)
|
||||
end
|
||||
|
||||
it 'applies a small penalty to closed topic when ranking' do
|
||||
|
|
|
@ -99,6 +99,8 @@ describe SearchController do
|
|||
end
|
||||
|
||||
it "can search correctly" do
|
||||
SiteSetting.use_pg_headlines_for_excerpt = true
|
||||
|
||||
get "/search/query.json", params: {
|
||||
term: 'awesome'
|
||||
}
|
||||
|
@ -109,11 +111,11 @@ describe SearchController do
|
|||
|
||||
expect(data['posts'].length).to eq(2)
|
||||
expect(data['posts'][0]['id']).to eq(awesome_post_2.id)
|
||||
expect(data['posts'][0]['blurb']).to eq(awesome_post_2.raw)
|
||||
expect(data['posts'][0]['blurb']).to eq("this is my really <span class=\"#{Search::HIGHLIGHT_CSS_CLASS}\">awesome</span> post")
|
||||
expect(data['topics'][0]['id']).to eq(awesome_post_2.topic_id)
|
||||
|
||||
expect(data['posts'][1]['id']).to eq(awesome_post.id)
|
||||
expect(data['posts'][1]['blurb']).to eq(awesome_post.raw)
|
||||
expect(data['posts'][1]['blurb']).to eq("this is my really <span class=\"#{Search::HIGHLIGHT_CSS_CLASS}\">awesome</span> post")
|
||||
expect(data['topics'][1]['id']).to eq(awesome_post.topic_id)
|
||||
end
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user