From 053cbe3112a9074439e4ed42c53a0e901b0afe1f Mon Sep 17 00:00:00 2001 From: Guo Xiang Tan Date: Fri, 7 Aug 2020 14:36:12 +0800 Subject: [PATCH] PERF: Limit characters used to generate headline for search blurb. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We determined using the following benchmark script that limiting to 2500 chars would mean a maximum of 25ms spent generating headlines. ``` require 'benchmark/ips' string = <<~STRING Far far away, behind the word mountains... STRING def sql_excerpt(string, l = 1000000) DB.query_single(<<~SQL) SELECT TS_HEADLINE('english', left('#{string}', #{l}), PLAINTO_TSQUERY('mountains')) SQL end def ruby_excerpt(string) output = DB.query_single("SELECT '#{string}'")[0] Search::GroupedSearchResults::TextHelper.excerpt(output, 'mountains', radius: 100) end puts "Ruby Excerpt: #{ruby_excerpt(string)}" puts "SQL Excerpt: #{sql_excerpt(string)}" puts Benchmark.ips do |x| x.time = 10 [1000, 2500, 5000, 10000, 20000, 50000].each do |l| short_string = string[0..l] x.report("ts_headline excerpt #{l}") do sql_excerpt(short_string, l) end x.report("actionview excerpt #{l}") do ruby_excerpt(short_string) end end x.compare! end ``` ``` actionview excerpt 1000: 20570.7 i/s actionview excerpt 2500: 17863.1 i/s - 1.15x (± 0.00) slower actionview excerpt 5000: 14228.9 i/s - 1.45x (± 0.00) slower actionview excerpt 10000: 10906.2 i/s - 1.89x (± 0.00) slower actionview excerpt 20000: 6255.0 i/s - 3.29x (± 0.00) slower ts_headline excerpt 1000: 4337.5 i/s - 4.74x (± 0.00) slower actionview excerpt 50000: 3222.7 i/s - 6.38x (± 0.00) slower ts_headline excerpt 2500: 2240.4 i/s - 9.18x (± 0.00) slower ts_headline excerpt 5000: 1258.7 i/s - 16.34x (± 0.00) slower ts_headline excerpt 10000: 667.2 i/s - 30.83x (± 0.00) slower ts_headline excerpt 20000: 348.7 i/s - 58.98x (± 0.00) slower ts_headline excerpt 50000: 131.9 i/s - 155.91x (± 0.00) slower ``` --- lib/search.rb | 6 +++++- spec/components/search_spec.rb | 14 ++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/lib/search.rb b/lib/search.rb index fe19f17937e..16b813d8587 100644 --- a/lib/search.rb +++ b/lib/search.rb @@ -1164,6 +1164,10 @@ class Search query.includes(topic: topic_eager_loads) end + # Limited for performance reasons since `TS_HEADLINE` is slow when the text + # document is too long. + MAX_LENGTH_FOR_HEADLINE = 2500 + def posts_scope(default_scope = Post.all) if SiteSetting.use_pg_headlines_for_excerpt search_term = @term.present? ? PG::Connection.escape_string(@term) : nil @@ -1174,7 +1178,7 @@ class Search .joins("INNER JOIN topics t1 ON t1.id = posts.topic_id") .select( "TS_HEADLINE(#{ts_config}, t1.fancy_title, PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'), 'StartSel='''', StopSel=''''') AS topic_title_headline", - "TS_HEADLINE(#{ts_config}, pd.raw_data, PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'), 'ShortWord=0, MaxFragments=1, MinWords=50, MaxWords=51, StartSel='''', StopSel=''''') AS headline", + "TS_HEADLINE(#{ts_config}, LEFT(pd.raw_data, #{MAX_LENGTH_FOR_HEADLINE}), PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'), 'ShortWord=0, MaxFragments=1, MinWords=50, MaxWords=51, StartSel='''', StopSel=''''') AS headline", default_scope.arel.projections ) else diff --git a/spec/components/search_spec.rb b/spec/components/search_spec.rb index 85c0183b8f2..2588bc27e09 100644 --- a/spec/components/search_spec.rb +++ b/spec/components/search_spec.rb @@ -429,6 +429,20 @@ describe Search do expect(post.topic_title_headline).to eq(topic.fancy_title) end + it "it limits the headline to #{Search::MAX_LENGTH_FOR_HEADLINE} characters" do + SiteSetting.use_pg_headlines_for_excerpt = true + + reply.update!(raw: "#{'a' * Search::MAX_LENGTH_FOR_HEADLINE} #{reply.raw}") + + result = Search.execute('elephant') + + expect(result.posts.map(&:id)).to contain_exactly(reply.id) + + post = result.posts.first + + expect(post.headline.include?('elephant')).to eq(false) + end + it 'returns the right post and blurb for searches with phrase' do SiteSetting.use_pg_headlines_for_excerpt = true