2019-05-03 06:17:27 +08:00
|
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
2013-05-23 02:36:14 +08:00
|
|
|
|
class Search
|
2018-10-23 09:10:33 +08:00
|
|
|
|
DIACRITICS ||= /([\u0300-\u036f]|[\u1AB0-\u1AFF]|[\u1DC0-\u1DFF]|[\u20D0-\u20FF])/
|
2020-08-06 14:15:31 +08:00
|
|
|
|
HIGHLIGHT_CSS_CLASS = "search-highlight"
|
2013-02-06 03:16:51 +08:00
|
|
|
|
|
2019-03-18 01:46:09 +08:00
|
|
|
|
cattr_accessor :preloaded_topic_custom_fields
|
|
|
|
|
self.preloaded_topic_custom_fields = Set.new
|
|
|
|
|
|
2020-12-18 00:29:10 +08:00
|
|
|
|
def self.on_preload(&blk)
|
|
|
|
|
(@preload ||= Set.new) << blk
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def self.preload(results, object)
|
|
|
|
|
@preload.each { |preload| preload.call(results, object) } if @preload
|
|
|
|
|
end
|
|
|
|
|
|
2013-02-06 03:16:51 +08:00
|
|
|
|
def self.per_facet
|
|
|
|
|
5
|
|
|
|
|
end
|
|
|
|
|
|
2014-09-02 17:15:08 +08:00
|
|
|
|
def self.per_filter
|
|
|
|
|
50
|
|
|
|
|
end
|
|
|
|
|
|
2013-02-06 03:16:51 +08:00
|
|
|
|
def self.facets
|
2021-10-06 23:42:52 +08:00
|
|
|
|
%w[topic category user private_messages tags all_topics exclude_topics]
|
2013-02-06 03:16:51 +08:00
|
|
|
|
end
|
|
|
|
|
|
2017-08-01 03:28:48 +08:00
|
|
|
|
def self.ts_config(locale = SiteSetting.default_locale)
|
|
|
|
|
# if adding a text search configuration, you should check PG beforehand:
|
|
|
|
|
# SELECT cfgname FROM pg_ts_config;
|
|
|
|
|
# As an aside, dictionaries can be listed by `\dFd`, the
|
|
|
|
|
# physical locations are in /usr/share/postgresql/<version>/tsearch_data.
|
|
|
|
|
# But it may not appear there based on pg extension configuration.
|
2014-06-24 15:10:56 +08:00
|
|
|
|
# base docker config
|
|
|
|
|
#
|
2019-05-27 21:52:09 +08:00
|
|
|
|
case locale.split("_")[0].to_sym
|
|
|
|
|
when :da
|
|
|
|
|
"danish"
|
|
|
|
|
when :nl
|
|
|
|
|
"dutch"
|
|
|
|
|
when :en
|
|
|
|
|
"english"
|
|
|
|
|
when :fi
|
|
|
|
|
"finnish"
|
|
|
|
|
when :fr
|
|
|
|
|
"french"
|
|
|
|
|
when :de
|
|
|
|
|
"german"
|
|
|
|
|
when :hu
|
|
|
|
|
"hungarian"
|
|
|
|
|
when :it
|
|
|
|
|
"italian"
|
|
|
|
|
when :nb
|
|
|
|
|
"norwegian"
|
|
|
|
|
when :pt
|
|
|
|
|
"portuguese"
|
|
|
|
|
when :ro
|
|
|
|
|
"romanian"
|
|
|
|
|
when :ru
|
|
|
|
|
"russian"
|
|
|
|
|
when :es
|
|
|
|
|
"spanish"
|
|
|
|
|
when :sv
|
|
|
|
|
"swedish"
|
|
|
|
|
when :tr
|
|
|
|
|
"turkish"
|
2017-08-01 03:28:48 +08:00
|
|
|
|
else
|
|
|
|
|
"simple" # use the 'simple' stemmer for other languages
|
2013-03-01 03:14:22 +08:00
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2022-03-08 05:03:10 +08:00
|
|
|
|
def self.wrap_unaccent(str)
|
|
|
|
|
SiteSetting.search_ignore_accents ? "unaccent(#{str})" : str
|
|
|
|
|
end
|
|
|
|
|
|
2022-01-26 15:24:11 +08:00
|
|
|
|
def self.segment_chinese?
|
|
|
|
|
%w[zh_TW zh_CN].include?(SiteSetting.default_locale) || SiteSetting.search_tokenize_chinese
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def self.segment_japanese?
|
|
|
|
|
SiteSetting.default_locale == "ja" || SiteSetting.search_tokenize_japanese
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def self.japanese_punctuation_regexp
|
|
|
|
|
# Regexp adapted from https://github.com/6/tiny_segmenter/blob/15a5b825993dfd2c662df3766f232051716bef5b/lib/tiny_segmenter.rb#L7
|
|
|
|
|
@japanese_punctuation_regexp ||=
|
|
|
|
|
Regexp.compile("[-–—―.。・()()[]{}{}【】⟨⟩、、,,،…‥〽「」『』〜~!!::??\"'|__“”‘’;/⁄/«»]")
|
2021-04-12 10:46:42 +08:00
|
|
|
|
end
|
|
|
|
|
|
2021-11-01 10:14:47 +08:00
|
|
|
|
def self.prepare_data(search_data, purpose = nil)
|
2018-11-07 06:41:55 +08:00
|
|
|
|
data = search_data.dup
|
2014-06-24 15:10:56 +08:00
|
|
|
|
data.force_encoding("UTF-8")
|
2021-11-01 10:14:47 +08:00
|
|
|
|
|
2018-11-07 06:41:55 +08:00
|
|
|
|
if purpose != :topic
|
2022-01-26 15:24:11 +08:00
|
|
|
|
if segment_chinese?
|
2018-11-07 06:41:55 +08:00
|
|
|
|
require "cppjieba_rb" unless defined?(CppjiebaRb)
|
2020-05-18 08:54:56 +08:00
|
|
|
|
|
2022-01-26 15:24:11 +08:00
|
|
|
|
segmented_data = []
|
|
|
|
|
|
|
|
|
|
# We need to split up the string here because Cppjieba has a bug where text starting with numeric chars will
|
|
|
|
|
# be split into two segments. For example, '123abc' becomes '123' and 'abc' after segmentation.
|
|
|
|
|
data.scan(/(?<chinese>[\p{Han}。,、“”《》…\.:?!;()]+)|([^\p{Han}]+)/) do
|
|
|
|
|
match_data = $LAST_MATCH_INFO
|
|
|
|
|
|
|
|
|
|
if match_data[:chinese]
|
|
|
|
|
segments = CppjiebaRb.segment(match_data.to_s, mode: :mix)
|
|
|
|
|
|
|
|
|
|
segments = CppjiebaRb.filter_stop_word(segments) if ts_config != "english"
|
|
|
|
|
|
|
|
|
|
segments = segments.filter { |s| s.present? }
|
|
|
|
|
segmented_data << segments.join(" ")
|
|
|
|
|
else
|
|
|
|
|
segmented_data << match_data.to_s.squish
|
|
|
|
|
end
|
2020-05-18 08:54:56 +08:00
|
|
|
|
end
|
|
|
|
|
|
2022-01-26 15:24:11 +08:00
|
|
|
|
data = segmented_data.join(" ")
|
|
|
|
|
elsif segment_japanese?
|
|
|
|
|
data.gsub!(japanese_punctuation_regexp, " ")
|
|
|
|
|
data = TinyJapaneseSegmenter.segment(data)
|
|
|
|
|
data = data.filter { |s| s.present? }
|
2020-05-18 08:54:56 +08:00
|
|
|
|
data = data.join(" ")
|
2018-11-07 06:41:55 +08:00
|
|
|
|
else
|
|
|
|
|
data.squish!
|
|
|
|
|
end
|
2018-10-23 09:10:33 +08:00
|
|
|
|
end
|
2020-07-09 17:02:02 +08:00
|
|
|
|
|
2020-10-08 08:40:13 +08:00
|
|
|
|
data.gsub!(/\S+/) do |str|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
if str =~ %r{\A["]?((https?://)[\S]+)["]?\z}
|
2020-10-08 08:40:13 +08:00
|
|
|
|
begin
|
2020-10-08 09:14:12 +08:00
|
|
|
|
uri = URI.parse(Regexp.last_match[1])
|
2020-10-08 08:40:13 +08:00
|
|
|
|
uri.query = nil
|
|
|
|
|
str = uri.to_s
|
|
|
|
|
rescue URI::Error
|
|
|
|
|
# don't fail if uri does not parse
|
|
|
|
|
end
|
|
|
|
|
end
|
2020-10-08 09:14:12 +08:00
|
|
|
|
|
2020-10-08 08:40:13 +08:00
|
|
|
|
str
|
2020-07-09 17:02:02 +08:00
|
|
|
|
end
|
|
|
|
|
|
2014-06-24 15:10:56 +08:00
|
|
|
|
data
|
|
|
|
|
end
|
|
|
|
|
|
2016-03-14 20:27:02 +08:00
|
|
|
|
def self.word_to_date(str)
|
2023-01-21 02:52:49 +08:00
|
|
|
|
return Time.zone.now.beginning_of_day.days_ago(str.to_i) if str =~ /\A[0-9]{1,3}\z/
|
2016-03-14 20:27:02 +08:00
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
if str =~ /\A([12][0-9]{3})(-([0-1]?[0-9]))?(-([0-3]?[0-9]))?\z/
|
2016-03-14 20:27:02 +08:00
|
|
|
|
year = $1.to_i
|
|
|
|
|
month = $2 ? $3.to_i : 1
|
|
|
|
|
day = $4 ? $5.to_i : 1
|
|
|
|
|
|
|
|
|
|
return if day == 0 || month == 0 || day > 31 || month > 12
|
|
|
|
|
|
2018-05-07 11:38:04 +08:00
|
|
|
|
return(
|
|
|
|
|
begin
|
|
|
|
|
Time.zone.parse("#{year}-#{month}-#{day}")
|
|
|
|
|
rescue ArgumentError
|
|
|
|
|
end
|
2023-01-09 20:10:19 +08:00
|
|
|
|
)
|
2016-03-14 20:27:02 +08:00
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
return Time.zone.now.beginning_of_day.yesterday if str.downcase == "yesterday"
|
|
|
|
|
|
|
|
|
|
titlecase = str.downcase.titlecase
|
|
|
|
|
|
|
|
|
|
if Date::DAYNAMES.include?(titlecase)
|
|
|
|
|
return Time.zone.now.beginning_of_week(str.downcase.to_sym)
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
if idx = (Date::MONTHNAMES.find_index(titlecase) || Date::ABBR_MONTHNAMES.find_index(titlecase))
|
|
|
|
|
delta = Time.zone.now.month - idx
|
|
|
|
|
delta += 12 if delta < 0
|
|
|
|
|
Time.zone.now.beginning_of_month.months_ago(delta)
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2016-08-11 03:40:58 +08:00
|
|
|
|
def self.min_post_id_no_cache
|
|
|
|
|
return 0 unless SiteSetting.search_prefer_recent_posts?
|
|
|
|
|
|
2016-08-12 12:36:38 +08:00
|
|
|
|
offset, has_more =
|
|
|
|
|
Post
|
|
|
|
|
.unscoped
|
|
|
|
|
.order("id desc")
|
|
|
|
|
.offset(SiteSetting.search_recent_posts_size - 1)
|
|
|
|
|
.limit(2)
|
|
|
|
|
.pluck(:id)
|
|
|
|
|
|
|
|
|
|
has_more ? offset : 0
|
2016-08-11 03:40:58 +08:00
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def self.min_post_id(opts = nil)
|
|
|
|
|
return 0 unless SiteSetting.search_prefer_recent_posts?
|
|
|
|
|
|
|
|
|
|
# It can be quite slow to count all the posts so let's cache it
|
2019-11-27 09:35:14 +08:00
|
|
|
|
Discourse
|
|
|
|
|
.cache
|
|
|
|
|
.fetch("search-min-post-id:#{SiteSetting.search_recent_posts_size}", expires_in: 1.week) do
|
2016-08-11 03:40:58 +08:00
|
|
|
|
min_post_id_no_cache
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2016-08-10 02:48:39 +08:00
|
|
|
|
attr_accessor :term
|
2020-12-18 00:29:10 +08:00
|
|
|
|
attr_reader :clean_term, :guardian
|
2016-08-10 02:48:39 +08:00
|
|
|
|
|
2013-05-23 02:36:14 +08:00
|
|
|
|
def initialize(term, opts = nil)
|
|
|
|
|
@opts = opts || {}
|
|
|
|
|
@guardian = @opts[:guardian] || Guardian.new
|
2013-05-25 02:03:45 +08:00
|
|
|
|
@search_context = @opts[:search_context]
|
2015-07-27 14:13:11 +08:00
|
|
|
|
@blurb_length = @opts[:blurb_length]
|
2016-08-10 02:48:39 +08:00
|
|
|
|
@valid = true
|
2017-07-21 00:12:34 +08:00
|
|
|
|
@page = @opts[:page]
|
2020-11-20 00:56:19 +08:00
|
|
|
|
@search_all_pms = false
|
2014-12-05 07:22:39 +08:00
|
|
|
|
|
2018-08-24 08:00:51 +08:00
|
|
|
|
term = term.to_s.dup
|
|
|
|
|
|
2017-06-07 14:23:48 +08:00
|
|
|
|
# Removes any zero-width characters from search terms
|
2018-08-24 08:00:51 +08:00
|
|
|
|
term.gsub!(/[\u200B-\u200D\uFEFF]/, "")
|
2017-12-12 13:47:28 +08:00
|
|
|
|
# Replace curly quotes to regular quotes
|
2018-08-24 08:00:51 +08:00
|
|
|
|
term.gsub!(/[\u201c\u201d]/, '"')
|
|
|
|
|
|
|
|
|
|
@clean_term = term
|
2019-02-05 17:54:52 +08:00
|
|
|
|
@in_title = false
|
2017-06-22 03:51:15 +08:00
|
|
|
|
|
2015-06-26 15:36:28 +08:00
|
|
|
|
term = process_advanced_search!(term)
|
2015-09-15 15:21:46 +08:00
|
|
|
|
|
2015-06-26 15:36:28 +08:00
|
|
|
|
if term.present?
|
2018-11-07 06:41:55 +08:00
|
|
|
|
@term = Search.prepare_data(term, Topic === @search_context ? :topic : nil)
|
2020-10-19 14:18:04 +08:00
|
|
|
|
@original_term = Search.escape_string(@term)
|
2015-06-26 15:36:28 +08:00
|
|
|
|
end
|
|
|
|
|
|
2020-11-20 00:56:19 +08:00
|
|
|
|
if @search_pms || @search_all_pms || @opts[:type_filter] == "private_messages"
|
2014-12-05 07:22:39 +08:00
|
|
|
|
@opts[:type_filter] = "private_messages"
|
2020-09-10 11:37:18 +08:00
|
|
|
|
@search_context ||= @guardian.user
|
|
|
|
|
|
|
|
|
|
unless @search_context.present? && @guardian.can_see_private_messages?(@search_context.id)
|
|
|
|
|
raise Discourse::InvalidAccess.new
|
|
|
|
|
end
|
2014-12-05 07:22:39 +08:00
|
|
|
|
end
|
|
|
|
|
|
2020-03-06 01:50:29 +08:00
|
|
|
|
@opts[:type_filter] = "all_topics" if @search_all_topics && @guardian.user
|
2020-01-28 18:11:33 +08:00
|
|
|
|
|
2017-07-17 23:57:13 +08:00
|
|
|
|
@results =
|
|
|
|
|
GroupedSearchResults.new(
|
2020-07-14 11:05:57 +08:00
|
|
|
|
type_filter: @opts[:type_filter],
|
|
|
|
|
term: clean_term,
|
|
|
|
|
blurb_term: term,
|
|
|
|
|
search_context: @search_context,
|
2022-05-24 23:31:24 +08:00
|
|
|
|
blurb_length: @blurb_length,
|
|
|
|
|
is_header_search: !use_full_page_limit,
|
2017-07-17 23:57:13 +08:00
|
|
|
|
)
|
2014-09-02 17:15:08 +08:00
|
|
|
|
end
|
|
|
|
|
|
2017-07-21 00:12:34 +08:00
|
|
|
|
def limit
|
2022-05-24 23:31:24 +08:00
|
|
|
|
if use_full_page_limit
|
2017-07-21 00:12:34 +08:00
|
|
|
|
Search.per_filter + 1
|
|
|
|
|
else
|
|
|
|
|
Search.per_facet + 1
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def offset
|
|
|
|
|
if @page && @opts[:type_filter].present?
|
|
|
|
|
(@page - 1) * Search.per_filter
|
|
|
|
|
else
|
|
|
|
|
0
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2016-08-10 02:48:39 +08:00
|
|
|
|
def valid?
|
|
|
|
|
@valid
|
|
|
|
|
end
|
|
|
|
|
|
2022-05-24 23:31:24 +08:00
|
|
|
|
def use_full_page_limit
|
|
|
|
|
@opts[:search_type] == :full_page || Topic === @search_context
|
|
|
|
|
end
|
|
|
|
|
|
2014-09-02 17:15:08 +08:00
|
|
|
|
def self.execute(term, opts = nil)
|
|
|
|
|
self.new(term, opts).execute
|
2013-05-14 05:04:41 +08:00
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
# Query a term
|
2020-08-04 14:01:43 +08:00
|
|
|
|
def execute(readonly_mode: Discourse.readonly_mode?)
|
2021-11-18 09:21:12 +08:00
|
|
|
|
if log_query?(readonly_mode)
|
2017-07-17 23:57:13 +08:00
|
|
|
|
status, search_log_id =
|
|
|
|
|
SearchLog.log(
|
2021-11-26 04:44:15 +08:00
|
|
|
|
term: @clean_term,
|
2017-07-14 01:34:31 +08:00
|
|
|
|
search_type: @opts[:search_type],
|
|
|
|
|
ip_address: @opts[:ip_address],
|
|
|
|
|
user_id: @opts[:user_id],
|
|
|
|
|
)
|
2017-07-17 23:57:13 +08:00
|
|
|
|
@results.search_log_id = search_log_id unless status == :error
|
2017-07-14 01:34:31 +08:00
|
|
|
|
end
|
2016-08-10 02:48:39 +08:00
|
|
|
|
|
2017-07-26 08:51:44 +08:00
|
|
|
|
unless @filters.present? || @opts[:search_for_id]
|
2022-01-26 15:24:11 +08:00
|
|
|
|
min_length = min_search_term_length
|
2016-08-10 02:48:39 +08:00
|
|
|
|
terms = (@term || "").split(/\s(?=(?:[^"]|"[^"]*")*$)/).reject { |t| t.length < min_length }
|
|
|
|
|
|
|
|
|
|
if terms.blank?
|
|
|
|
|
@term = ""
|
|
|
|
|
@valid = false
|
|
|
|
|
return
|
|
|
|
|
end
|
2015-06-23 10:14:06 +08:00
|
|
|
|
end
|
2013-05-14 05:04:41 +08:00
|
|
|
|
|
|
|
|
|
# If the term is a number or url to a topic, just include that topic
|
2020-01-28 18:11:33 +08:00
|
|
|
|
if @opts[:search_for_id] && %w[topic private_messages all_topics].include?(@results.type_filter)
|
2023-01-21 02:52:49 +08:00
|
|
|
|
if @term =~ /\A\d+\z/
|
2014-09-02 17:15:08 +08:00
|
|
|
|
single_topic(@term.to_i)
|
|
|
|
|
else
|
2020-11-20 17:28:14 +08:00
|
|
|
|
if route = Discourse.route_for(@term)
|
|
|
|
|
if route[:controller] == "topics" && route[:action] == "show"
|
|
|
|
|
topic_id = (route[:id] || route[:topic_id]).to_i
|
|
|
|
|
single_topic(topic_id) if topic_id > 0
|
2020-06-29 18:31:20 +08:00
|
|
|
|
end
|
2014-09-02 17:15:08 +08:00
|
|
|
|
end
|
2013-05-14 05:04:41 +08:00
|
|
|
|
end
|
|
|
|
|
end
|
2013-03-02 01:45:25 +08:00
|
|
|
|
|
2019-03-18 15:25:45 +08:00
|
|
|
|
find_grouped_results if @results.posts.blank?
|
2014-09-02 17:15:08 +08:00
|
|
|
|
|
2019-03-18 01:46:09 +08:00
|
|
|
|
if preloaded_topic_custom_fields.present? && @results.posts.present?
|
|
|
|
|
topics = @results.posts.map(&:topic)
|
|
|
|
|
Topic.preload_custom_fields(topics, preloaded_topic_custom_fields)
|
|
|
|
|
end
|
|
|
|
|
|
2020-12-18 00:29:10 +08:00
|
|
|
|
Search.preload(@results, self)
|
|
|
|
|
|
2014-09-02 17:15:08 +08:00
|
|
|
|
@results
|
2013-05-14 05:04:41 +08:00
|
|
|
|
end
|
2013-02-06 03:16:51 +08:00
|
|
|
|
|
2020-08-07 10:47:00 +08:00
|
|
|
|
def self.advanced_order(trigger, &block)
|
|
|
|
|
(@advanced_orders ||= {})[trigger] = block
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def self.advanced_orders
|
|
|
|
|
@advanced_orders
|
|
|
|
|
end
|
|
|
|
|
|
2015-06-23 10:14:06 +08:00
|
|
|
|
def self.advanced_filter(trigger, &block)
|
|
|
|
|
(@advanced_filters ||= {})[trigger] = block
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def self.advanced_filters
|
|
|
|
|
@advanced_filters
|
|
|
|
|
end
|
|
|
|
|
|
2020-09-14 09:58:28 +08:00
|
|
|
|
def self.custom_topic_eager_load(tables = nil, &block)
|
|
|
|
|
(@custom_topic_eager_loads ||= []) << (tables || block)
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def self.custom_topic_eager_loads
|
|
|
|
|
Array.wrap(@custom_topic_eager_loads)
|
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Ain:personal-direct\z/i) do |posts|
|
2019-07-22 22:55:49 +08:00
|
|
|
|
if @guardian.user
|
|
|
|
|
posts.joins("LEFT JOIN topic_allowed_groups tg ON posts.topic_id = tg.topic_id").where(
|
|
|
|
|
<<~SQL,
|
|
|
|
|
tg.id IS NULL
|
|
|
|
|
AND posts.topic_id IN (
|
|
|
|
|
SELECT tau.topic_id
|
|
|
|
|
FROM topic_allowed_users tau
|
|
|
|
|
JOIN topic_allowed_users tau2
|
|
|
|
|
ON tau2.topic_id = tau.topic_id
|
|
|
|
|
AND tau2.id != tau.id
|
|
|
|
|
WHERE tau.user_id = :user_id
|
|
|
|
|
GROUP BY tau.topic_id
|
|
|
|
|
HAVING COUNT(*) = 1
|
|
|
|
|
)
|
|
|
|
|
SQL
|
2017-03-08 20:37:29 +08:00
|
|
|
|
user_id: @guardian.user.id,
|
2023-01-09 20:10:19 +08:00
|
|
|
|
)
|
2019-07-22 22:55:49 +08:00
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Ain:all-pms\z/i) { |posts| posts.private_posts if @guardian.is_admin? }
|
2020-11-20 00:56:19 +08:00
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Ain:tagged\z/i) do |posts|
|
2019-06-28 16:19:57 +08:00
|
|
|
|
posts.where("EXISTS (SELECT 1 FROM topic_tags WHERE topic_tags.topic_id = posts.topic_id)")
|
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Ain:untagged\z/i) do |posts|
|
2019-06-28 16:19:57 +08:00
|
|
|
|
posts.joins(
|
|
|
|
|
"LEFT JOIN topic_tags ON
|
|
|
|
|
topic_tags.topic_id = posts.topic_id",
|
|
|
|
|
).where("topic_tags.id IS NULL")
|
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Astatus:open\z/i) do |posts|
|
2015-06-23 10:14:06 +08:00
|
|
|
|
posts.where("NOT topics.closed AND NOT topics.archived")
|
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Astatus:closed\z/i) { |posts| posts.where("topics.closed") }
|
2015-06-23 10:14:06 +08:00
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Astatus:public\z/i) do |posts|
|
2019-12-17 00:41:34 +08:00
|
|
|
|
category_ids = Category.where(read_restricted: false).pluck(:id)
|
|
|
|
|
|
|
|
|
|
posts.where("topics.category_id in (?)", category_ids)
|
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Astatus:archived\z/i) { |posts| posts.where("topics.archived") }
|
2015-06-23 10:14:06 +08:00
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Astatus:noreplies\z/i) { |posts| posts.where("topics.posts_count = 1") }
|
2015-06-23 10:14:06 +08:00
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Astatus:single_user\z/i) { |posts| posts.where("topics.participant_count = 1") }
|
2015-06-23 10:14:06 +08:00
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Aposts_count:(\d+)\z/i) do |posts, match|
|
2015-06-23 10:14:06 +08:00
|
|
|
|
posts.where("topics.posts_count = ?", match.to_i)
|
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Amin_post_count:(\d+)\z/i) do |posts, match|
|
2020-09-28 17:37:51 +08:00
|
|
|
|
posts.where("topics.posts_count >= ?", match.to_i)
|
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Amin_posts:(\d+)\z/i) do |posts, match|
|
2016-11-28 22:57:18 +08:00
|
|
|
|
posts.where("topics.posts_count >= ?", match.to_i)
|
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Amax_posts:(\d+)\z/i) do |posts, match|
|
2020-09-29 00:04:16 +08:00
|
|
|
|
posts.where("topics.posts_count <= ?", match.to_i)
|
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Ain:first|^f\z/i) { |posts| posts.where("posts.post_number = 1") }
|
2015-06-23 10:14:06 +08:00
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Ain:pinned\z/i) { |posts| posts.where("topics.pinned_at IS NOT NULL") }
|
2016-03-18 13:26:54 +08:00
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Ain:wiki\z/i) { |posts, match| posts.where(wiki: true) }
|
2016-05-13 16:26:53 +08:00
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Abadge:(.*)\z/i) do |posts, match|
|
2019-10-21 18:32:27 +08:00
|
|
|
|
badge_id = Badge.where("name ilike ? OR id = ?", match, match.to_i).pluck_first(:id)
|
2015-09-15 15:21:46 +08:00
|
|
|
|
if badge_id
|
|
|
|
|
posts.where(
|
|
|
|
|
"posts.user_id IN (SELECT ub.user_id FROM user_badges ub WHERE ub.badge_id = ?)",
|
|
|
|
|
badge_id,
|
|
|
|
|
)
|
|
|
|
|
else
|
|
|
|
|
posts.where("1 = 0")
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2020-04-16 09:32:21 +08:00
|
|
|
|
def post_action_type_filter(posts, post_action_type)
|
|
|
|
|
posts.where(
|
|
|
|
|
"posts.id IN (
|
|
|
|
|
SELECT pa.post_id FROM post_actions pa
|
2022-11-02 03:05:13 +08:00
|
|
|
|
WHERE pa.user_id = ? AND
|
|
|
|
|
pa.post_action_type_id = ? AND
|
2020-04-16 09:32:21 +08:00
|
|
|
|
deleted_at IS NULL
|
2022-11-02 03:05:13 +08:00
|
|
|
|
)",
|
|
|
|
|
@guardian.user.id,
|
|
|
|
|
post_action_type,
|
|
|
|
|
)
|
2020-04-16 09:32:21 +08:00
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Ain:(likes)\z/i) do |posts, match|
|
2020-04-16 09:32:21 +08:00
|
|
|
|
post_action_type_filter(posts, PostActionType.types[:like]) if @guardian.user
|
|
|
|
|
end
|
|
|
|
|
|
2022-05-10 07:08:01 +08:00
|
|
|
|
# NOTE: With polymorphic bookmarks it may make sense to possibly expand
|
|
|
|
|
# this at some point, as it only acts on posts at the moment. On the other
|
|
|
|
|
# hand, this may not be necessary, as the user bookmark list has advanced
|
|
|
|
|
# search based on a RegisteredBookmarkable's #search_query method.
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Ain:(bookmarks)\z/i) do |posts, match|
|
2022-11-02 03:05:13 +08:00
|
|
|
|
posts.where(<<~SQL, @guardian.user.id) if @guardian.user
|
2022-05-23 08:07:15 +08:00
|
|
|
|
posts.id IN (
|
|
|
|
|
SELECT bookmarkable_id FROM bookmarks
|
2022-11-02 03:05:13 +08:00
|
|
|
|
WHERE bookmarks.user_id = ? AND bookmarks.bookmarkable_type = 'Post'
|
2022-05-23 08:07:15 +08:00
|
|
|
|
)
|
|
|
|
|
SQL
|
2015-06-23 10:14:06 +08:00
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Ain:posted\z/i) do |posts|
|
2022-11-02 03:05:13 +08:00
|
|
|
|
posts.where("posts.user_id = ?", @guardian.user.id) if @guardian.user
|
2015-06-23 10:14:06 +08:00
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Ain:(created|mine)\z/i) do |posts|
|
2019-12-17 00:41:34 +08:00
|
|
|
|
posts.where(user_id: @guardian.user.id, post_number: 1) if @guardian.user
|
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Acreated:@(.*)\z/i) do |posts, match|
|
2020-01-23 04:16:17 +08:00
|
|
|
|
user_id = User.where(username: match.downcase).pluck_first(:id)
|
|
|
|
|
posts.where(user_id: user_id, post_number: 1)
|
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Ain:(watching|tracking)\z/i) do |posts, match|
|
2015-06-23 10:14:06 +08:00
|
|
|
|
if @guardian.user
|
2020-09-23 16:59:42 +08:00
|
|
|
|
level = TopicUser.notification_levels[match.downcase.to_sym]
|
2015-06-23 10:14:06 +08:00
|
|
|
|
posts.where(
|
|
|
|
|
"posts.topic_id IN (
|
|
|
|
|
SELECT tu.topic_id FROM topic_users tu
|
2017-03-08 20:37:29 +08:00
|
|
|
|
WHERE tu.user_id = :user_id AND
|
|
|
|
|
tu.notification_level >= :level
|
|
|
|
|
)",
|
|
|
|
|
user_id: @guardian.user.id,
|
|
|
|
|
level: level,
|
|
|
|
|
)
|
2015-06-23 10:14:06 +08:00
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Ain:seen\z/i) do |posts|
|
2017-03-08 22:46:23 +08:00
|
|
|
|
if @guardian.user
|
|
|
|
|
posts.joins(
|
|
|
|
|
"INNER JOIN post_timings ON
|
|
|
|
|
post_timings.topic_id = posts.topic_id
|
|
|
|
|
AND post_timings.post_number = posts.post_number
|
2017-08-31 12:06:56 +08:00
|
|
|
|
AND post_timings.user_id = #{ActiveRecord::Base.connection.quote(@guardian.user.id)}
|
2017-03-08 22:46:23 +08:00
|
|
|
|
",
|
|
|
|
|
)
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Ain:unseen\z/i) do |posts|
|
2017-03-08 22:46:23 +08:00
|
|
|
|
if @guardian.user
|
|
|
|
|
posts.joins(
|
|
|
|
|
"LEFT JOIN post_timings ON
|
|
|
|
|
post_timings.topic_id = posts.topic_id
|
|
|
|
|
AND post_timings.post_number = posts.post_number
|
2017-08-31 12:06:56 +08:00
|
|
|
|
AND post_timings.user_id = #{ActiveRecord::Base.connection.quote(@guardian.user.id)}
|
2017-03-08 22:46:23 +08:00
|
|
|
|
",
|
|
|
|
|
).where("post_timings.user_id IS NULL")
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Awith:images\z/i) { |posts| posts.where("posts.image_upload_id IS NOT NULL") }
|
2017-06-08 02:13:36 +08:00
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Acategory:(.+)\z/i) do |posts, match|
|
2017-02-08 04:53:37 +08:00
|
|
|
|
exact = false
|
|
|
|
|
|
|
|
|
|
if match[0] == "="
|
|
|
|
|
exact = true
|
|
|
|
|
match = match[1..-1]
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
category_ids =
|
|
|
|
|
Category.where("slug ilike ? OR name ilike ? OR id = ?", match, match, match.to_i).pluck(:id)
|
2016-06-09 01:50:33 +08:00
|
|
|
|
if category_ids.present?
|
2021-08-02 19:04:13 +08:00
|
|
|
|
category_ids += Category.subcategory_ids(category_ids.first) unless exact
|
2019-03-19 11:23:14 +08:00
|
|
|
|
@category_filter_matched ||= true
|
2016-06-09 01:50:33 +08:00
|
|
|
|
posts.where("topics.category_id IN (?)", category_ids)
|
2015-08-14 09:53:16 +08:00
|
|
|
|
else
|
|
|
|
|
posts.where("1 = 0")
|
|
|
|
|
end
|
2015-06-23 10:14:06 +08:00
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\A\#([\p{L}\p{M}0-9\-:=]+)\z/i) do |posts, match|
|
2019-03-13 15:23:01 +08:00
|
|
|
|
category_slug, subcategory_slug = match.to_s.split(":")
|
|
|
|
|
next unless category_slug
|
2018-05-28 13:28:02 +08:00
|
|
|
|
|
2021-08-02 19:04:13 +08:00
|
|
|
|
exact = true
|
|
|
|
|
if category_slug[0] == "="
|
|
|
|
|
category_slug = category_slug[1..-1]
|
2016-05-11 17:53:54 +08:00
|
|
|
|
else
|
2021-08-02 19:04:13 +08:00
|
|
|
|
exact = false
|
|
|
|
|
end
|
2017-02-08 04:53:37 +08:00
|
|
|
|
|
2021-08-02 19:04:13 +08:00
|
|
|
|
category_id =
|
|
|
|
|
if subcategory_slug
|
|
|
|
|
Category
|
|
|
|
|
.where("lower(slug) = ?", subcategory_slug.downcase)
|
|
|
|
|
.where(
|
|
|
|
|
parent_category_id:
|
|
|
|
|
Category.where("lower(slug) = ?", category_slug.downcase).select(:id),
|
|
|
|
|
)
|
|
|
|
|
.pluck_first(:id)
|
|
|
|
|
else
|
|
|
|
|
Category
|
|
|
|
|
.where("lower(slug) = ?", category_slug.downcase)
|
2017-02-08 04:53:37 +08:00
|
|
|
|
.order("case when parent_category_id is null then 0 else 1 end")
|
2021-08-02 19:04:13 +08:00
|
|
|
|
.pluck_first(:id)
|
2016-05-11 17:53:54 +08:00
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
if category_id
|
2017-02-08 04:53:37 +08:00
|
|
|
|
category_ids = [category_id]
|
2021-08-02 19:04:13 +08:00
|
|
|
|
category_ids += Category.subcategory_ids(category_id) if !exact
|
2019-03-19 11:23:14 +08:00
|
|
|
|
|
|
|
|
|
@category_filter_matched ||= true
|
2017-02-08 04:53:37 +08:00
|
|
|
|
posts.where("topics.category_id IN (?)", category_ids)
|
2016-05-11 17:53:54 +08:00
|
|
|
|
else
|
2017-08-02 06:15:04 +08:00
|
|
|
|
# try a possible tag match
|
2019-10-21 18:32:27 +08:00
|
|
|
|
tag_id = Tag.where_name(category_slug).pluck_first(:id)
|
2017-08-02 06:15:04 +08:00
|
|
|
|
if (tag_id)
|
2019-06-27 15:53:26 +08:00
|
|
|
|
posts.where(<<~SQL, tag_id)
|
|
|
|
|
topics.id IN (
|
|
|
|
|
SELECT DISTINCT(tt.topic_id)
|
|
|
|
|
FROM topic_tags tt
|
|
|
|
|
WHERE tt.tag_id = ?
|
|
|
|
|
)
|
|
|
|
|
SQL
|
2017-08-02 06:15:04 +08:00
|
|
|
|
else
|
2019-06-27 15:53:26 +08:00
|
|
|
|
if tag_group_id = TagGroup.find_id_by_slug(category_slug)
|
|
|
|
|
posts.where(<<~SQL, tag_group_id)
|
|
|
|
|
topics.id IN (
|
|
|
|
|
SELECT DISTINCT(tt.topic_id)
|
|
|
|
|
FROM topic_tags tt
|
|
|
|
|
WHERE tt.tag_id in (
|
|
|
|
|
SELECT tag_id
|
|
|
|
|
FROM tag_group_memberships
|
|
|
|
|
WHERE tag_group_id = ?
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
SQL
|
|
|
|
|
|
2017-08-02 06:15:04 +08:00
|
|
|
|
# a bit yucky but we got to add the term back in
|
2022-01-26 15:24:11 +08:00
|
|
|
|
elsif match.to_s.length >= min_search_term_length
|
2019-06-27 15:53:26 +08:00
|
|
|
|
posts.where <<~SQL
|
|
|
|
|
posts.id IN (
|
|
|
|
|
SELECT post_id FROM post_search_data pd1
|
|
|
|
|
WHERE pd1.search_data @@ #{Search.ts_query(term: "##{match}")})
|
|
|
|
|
SQL
|
2017-08-02 06:15:04 +08:00
|
|
|
|
end
|
|
|
|
|
end
|
2016-05-11 17:53:54 +08:00
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Agroup:(.+)\z/i) do |posts, match|
|
2023-01-17 02:48:00 +08:00
|
|
|
|
group_query =
|
2022-01-10 11:28:20 +08:00
|
|
|
|
Group
|
|
|
|
|
.visible_groups(@guardian.user)
|
|
|
|
|
.members_visible_groups(@guardian.user)
|
2023-01-17 02:48:00 +08:00
|
|
|
|
.where("groups.name ILIKE ? OR (id = ? AND id > 0)", match, match.to_i)
|
|
|
|
|
|
|
|
|
|
DiscoursePluginRegistry.search_groups_set_query_callbacks.each do |cb|
|
|
|
|
|
group_query = cb.call(group_query, @term, @guardian)
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
group_id = group_query.pluck_first(:id)
|
2022-01-10 11:28:20 +08:00
|
|
|
|
|
2015-09-15 15:39:14 +08:00
|
|
|
|
if group_id
|
|
|
|
|
posts.where(
|
|
|
|
|
"posts.user_id IN (select gu.user_id from group_users gu where gu.group_id = ?)",
|
|
|
|
|
group_id,
|
|
|
|
|
)
|
|
|
|
|
else
|
|
|
|
|
posts.where("1 = 0")
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Agroup_messages:(.+)\z/i) do |posts, match|
|
2022-04-28 22:47:40 +08:00
|
|
|
|
group_id =
|
|
|
|
|
Group
|
|
|
|
|
.visible_groups(@guardian.user)
|
|
|
|
|
.members_visible_groups(@guardian.user)
|
|
|
|
|
.where(has_messages: true)
|
|
|
|
|
.where("name ilike ? OR (id = ? AND id > 0)", match, match.to_i)
|
|
|
|
|
.pluck_first(:id)
|
|
|
|
|
|
|
|
|
|
if group_id
|
|
|
|
|
posts.where(
|
|
|
|
|
"posts.topic_id IN (SELECT topic_id FROM topic_allowed_groups WHERE group_id = ?)",
|
|
|
|
|
group_id,
|
|
|
|
|
)
|
|
|
|
|
else
|
|
|
|
|
posts.where("1 = 0")
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Auser:(.+)\z/i) do |posts, match|
|
2019-10-21 18:32:27 +08:00
|
|
|
|
user_id =
|
|
|
|
|
User
|
|
|
|
|
.where(staged: false)
|
|
|
|
|
.where("username_lower = ? OR id = ?", match.downcase, match.to_i)
|
|
|
|
|
.pluck_first(:id)
|
2015-08-14 09:53:16 +08:00
|
|
|
|
if user_id
|
2022-11-02 03:05:13 +08:00
|
|
|
|
posts.where("posts.user_id = ?", user_id)
|
2015-08-14 09:53:16 +08:00
|
|
|
|
else
|
|
|
|
|
posts.where("1 = 0")
|
|
|
|
|
end
|
2015-06-23 10:14:06 +08:00
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\A\@(\S+)\z/i) do |posts, match|
|
2022-11-16 17:42:37 +08:00
|
|
|
|
username = User.normalize_username(match)
|
2020-12-22 07:46:42 +08:00
|
|
|
|
|
2022-11-16 17:42:37 +08:00
|
|
|
|
user_id = User.not_staged.where(username_lower: username).pluck_first(:id)
|
2020-12-22 07:46:42 +08:00
|
|
|
|
|
|
|
|
|
user_id = @guardian.user&.id if !user_id && username == "me"
|
|
|
|
|
|
2016-05-12 16:43:44 +08:00
|
|
|
|
if user_id
|
2022-11-02 03:05:13 +08:00
|
|
|
|
posts.where("posts.user_id = ?", user_id)
|
2016-05-12 16:43:44 +08:00
|
|
|
|
else
|
|
|
|
|
posts.where("1 = 0")
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Abefore:(.*)\z/i) do |posts, match|
|
2016-03-14 20:27:02 +08:00
|
|
|
|
if date = Search.word_to_date(match)
|
|
|
|
|
posts.where("posts.created_at < ?", date)
|
|
|
|
|
else
|
|
|
|
|
posts
|
|
|
|
|
end
|
2015-06-23 11:21:50 +08:00
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Aafter:(.*)\z/i) do |posts, match|
|
2016-03-14 20:27:02 +08:00
|
|
|
|
if date = Search.word_to_date(match)
|
|
|
|
|
posts.where("posts.created_at > ?", date)
|
|
|
|
|
else
|
|
|
|
|
posts
|
|
|
|
|
end
|
2015-06-23 11:21:50 +08:00
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Atags?:([\p{L}\p{M}0-9,\-_+]+)\z/i) do |posts, match|
|
2018-01-04 05:45:21 +08:00
|
|
|
|
search_tags(posts, match, positive: true)
|
|
|
|
|
end
|
2017-05-31 22:36:15 +08:00
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\A\-tags?:([\p{L}\p{M}0-9,\-_+]+)\z/i) do |posts, match|
|
2018-01-04 05:45:21 +08:00
|
|
|
|
search_tags(posts, match, positive: false)
|
2016-04-26 03:55:15 +08:00
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Afiletypes?:([a-zA-Z0-9,\-_]+)\z/i) do |posts, match|
|
2017-07-07 01:11:32 +08:00
|
|
|
|
file_extensions = match.split(",").map(&:downcase)
|
2017-06-21 03:20:06 +08:00
|
|
|
|
posts.where(
|
|
|
|
|
"posts.id IN (
|
2018-08-04 07:56:26 +08:00
|
|
|
|
SELECT post_id
|
|
|
|
|
FROM topic_links
|
|
|
|
|
WHERE extension IN (:file_extensions)
|
|
|
|
|
|
2017-07-04 23:50:08 +08:00
|
|
|
|
UNION
|
2018-08-04 07:56:26 +08:00
|
|
|
|
|
2022-06-09 07:24:30 +08:00
|
|
|
|
SELECT upload_references.target_id
|
2018-08-04 07:56:26 +08:00
|
|
|
|
FROM uploads
|
2022-06-09 07:24:30 +08:00
|
|
|
|
JOIN upload_references ON upload_references.target_type = 'Post' AND upload_references.upload_id = uploads.id
|
2018-08-04 07:56:26 +08:00
|
|
|
|
WHERE lower(uploads.extension) IN (:file_extensions)
|
|
|
|
|
)",
|
|
|
|
|
file_extensions: file_extensions,
|
|
|
|
|
)
|
2017-06-21 03:20:06 +08:00
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Amin_views:(\d+)\z/i) do |posts, match|
|
2020-09-24 17:52:04 +08:00
|
|
|
|
posts.where("topics.views >= ?", match.to_i)
|
|
|
|
|
end
|
|
|
|
|
|
2023-01-21 02:52:49 +08:00
|
|
|
|
advanced_filter(/\Amax_views:(\d+)\z/i) do |posts, match|
|
2020-09-24 17:52:04 +08:00
|
|
|
|
posts.where("topics.views <= ?", match.to_i)
|
|
|
|
|
end
|
|
|
|
|
|
2013-05-23 02:36:14 +08:00
|
|
|
|
private
|
2013-02-06 03:16:51 +08:00
|
|
|
|
|
2018-01-04 05:45:21 +08:00
|
|
|
|
def search_tags(posts, match, positive:)
|
|
|
|
|
return if match.nil?
|
2018-10-05 17:23:52 +08:00
|
|
|
|
match.downcase!
|
2018-01-04 05:45:21 +08:00
|
|
|
|
modifier = positive ? "" : "NOT"
|
2014-10-18 11:54:11 +08:00
|
|
|
|
|
2018-01-04 05:45:21 +08:00
|
|
|
|
if match.include?("+")
|
|
|
|
|
tags = match.split("+")
|
|
|
|
|
|
|
|
|
|
posts.where(
|
|
|
|
|
"topics.id #{modifier} IN (
|
|
|
|
|
SELECT tt.topic_id
|
|
|
|
|
FROM topic_tags tt, tags
|
|
|
|
|
WHERE tt.tag_id = tags.id
|
|
|
|
|
GROUP BY tt.topic_id
|
2022-03-08 05:03:10 +08:00
|
|
|
|
HAVING to_tsvector(#{default_ts_config}, #{Search.wrap_unaccent("array_to_string(array_agg(lower(tags.name)), ' ')")}) @@ to_tsquery(#{default_ts_config}, #{Search.wrap_unaccent("?")})
|
2018-09-28 17:27:08 +08:00
|
|
|
|
)",
|
|
|
|
|
tags.join("&"),
|
|
|
|
|
)
|
2018-01-04 05:45:21 +08:00
|
|
|
|
else
|
|
|
|
|
tags = match.split(",")
|
|
|
|
|
|
|
|
|
|
posts.where(
|
|
|
|
|
"topics.id #{modifier} IN (
|
|
|
|
|
SELECT DISTINCT(tt.topic_id)
|
|
|
|
|
FROM topic_tags tt, tags
|
2018-10-05 17:23:52 +08:00
|
|
|
|
WHERE tt.tag_id = tags.id AND lower(tags.name) IN (?)
|
2018-09-28 17:27:08 +08:00
|
|
|
|
)",
|
|
|
|
|
tags,
|
|
|
|
|
)
|
2018-01-04 05:45:21 +08:00
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def process_advanced_search!(term)
|
2015-09-15 15:21:46 +08:00
|
|
|
|
term
|
|
|
|
|
.to_s
|
|
|
|
|
.scan(/(([^" \t\n\x0B\f\r]+)?(("[^"]+")?))/)
|
|
|
|
|
.to_a
|
|
|
|
|
.map do |(word, _)|
|
|
|
|
|
next if word.blank?
|
2023-01-09 20:10:19 +08:00
|
|
|
|
|
2015-06-23 10:14:06 +08:00
|
|
|
|
found = false
|
2023-01-09 20:10:19 +08:00
|
|
|
|
|
2015-06-23 10:14:06 +08:00
|
|
|
|
Search.advanced_filters.each do |matcher, block|
|
2015-09-15 15:21:46 +08:00
|
|
|
|
cleaned = word.gsub(/["']/, "")
|
|
|
|
|
if cleaned =~ matcher
|
2015-06-23 10:14:06 +08:00
|
|
|
|
(@filters ||= []) << [block, $1]
|
|
|
|
|
found = true
|
2023-01-09 20:10:19 +08:00
|
|
|
|
end
|
2015-06-23 10:14:06 +08:00
|
|
|
|
end
|
|
|
|
|
|
2020-08-07 10:47:00 +08:00
|
|
|
|
if word == "l"
|
2014-09-03 20:10:18 +08:00
|
|
|
|
@order = :latest
|
|
|
|
|
nil
|
2023-01-21 02:52:49 +08:00
|
|
|
|
elsif word =~ /\Aorder:\w+\z/i
|
2020-09-23 16:59:42 +08:00
|
|
|
|
@order = word.downcase.gsub("order:", "").to_sym
|
2017-03-30 01:33:23 +08:00
|
|
|
|
nil
|
2023-01-21 02:52:49 +08:00
|
|
|
|
elsif word =~ /\Ain:title\z/i || word == "t"
|
2018-02-20 11:41:00 +08:00
|
|
|
|
@in_title = true
|
|
|
|
|
nil
|
2023-01-21 02:52:49 +08:00
|
|
|
|
elsif word =~ /\Atopic:(\d+)\z/i
|
2015-06-26 15:36:28 +08:00
|
|
|
|
topic_id = $1.to_i
|
|
|
|
|
if topic_id > 1
|
|
|
|
|
topic = Topic.find_by(id: topic_id)
|
|
|
|
|
@search_context = topic if @guardian.can_see?(topic)
|
|
|
|
|
end
|
2018-06-07 13:28:18 +08:00
|
|
|
|
nil
|
2023-01-21 02:52:49 +08:00
|
|
|
|
elsif word =~ /\Ain:all\z/i
|
2020-01-28 18:11:33 +08:00
|
|
|
|
@search_all_topics = true
|
2022-04-27 04:47:01 +08:00
|
|
|
|
nil
|
2023-01-21 02:52:49 +08:00
|
|
|
|
elsif word =~ /\Ain:personal\z/i
|
2020-08-24 13:51:53 +08:00
|
|
|
|
@search_pms = true
|
2023-01-09 20:10:19 +08:00
|
|
|
|
nil
|
2023-01-21 02:52:49 +08:00
|
|
|
|
elsif word =~ /\Ain:messages\z/i
|
2020-08-24 13:51:53 +08:00
|
|
|
|
@search_pms = true
|
2023-01-09 20:10:19 +08:00
|
|
|
|
nil
|
2023-01-21 02:52:49 +08:00
|
|
|
|
elsif word =~ /\Ain:personal-direct\z/i
|
2020-08-24 13:51:53 +08:00
|
|
|
|
@search_pms = true
|
2023-01-09 20:10:19 +08:00
|
|
|
|
nil
|
2023-01-21 02:52:49 +08:00
|
|
|
|
elsif word =~ /\Ain:all-pms\z/i
|
2020-11-20 00:56:19 +08:00
|
|
|
|
@search_all_pms = true
|
2023-01-09 20:10:19 +08:00
|
|
|
|
nil
|
2023-01-21 02:52:49 +08:00
|
|
|
|
elsif word =~ /\Agroup_messages:(.+)\z/i
|
2020-08-24 13:51:53 +08:00
|
|
|
|
@search_pms = true
|
2023-01-09 20:10:19 +08:00
|
|
|
|
nil
|
2023-01-21 02:52:49 +08:00
|
|
|
|
elsif word =~ /\Apersonal_messages:(.+)\z/i
|
2020-08-24 11:53:07 +08:00
|
|
|
|
if user = User.find_by_username($1)
|
2020-08-24 13:51:53 +08:00
|
|
|
|
@search_pms = true
|
2020-08-24 11:53:07 +08:00
|
|
|
|
@search_context = user
|
|
|
|
|
end
|
|
|
|
|
|
2018-06-07 13:28:18 +08:00
|
|
|
|
nil
|
|
|
|
|
else
|
2015-06-23 10:14:06 +08:00
|
|
|
|
found ? nil : word
|
2023-01-09 20:10:19 +08:00
|
|
|
|
end
|
2018-06-07 13:28:18 +08:00
|
|
|
|
end
|
2014-09-03 19:54:10 +08:00
|
|
|
|
.compact
|
|
|
|
|
.join(" ")
|
|
|
|
|
end
|
|
|
|
|
|
2013-05-24 02:26:51 +08:00
|
|
|
|
def find_grouped_results
|
|
|
|
|
if @results.type_filter.present?
|
|
|
|
|
unless Search.facets.include?(@results.type_filter)
|
|
|
|
|
raise Discourse::InvalidAccess.new("invalid type filter")
|
2023-01-09 20:10:19 +08:00
|
|
|
|
end
|
2019-05-07 10:22:37 +08:00
|
|
|
|
# calling protected methods
|
2013-05-24 02:26:51 +08:00
|
|
|
|
send("#{@results.type_filter}_search")
|
2013-05-23 02:36:14 +08:00
|
|
|
|
else
|
2021-09-06 21:59:35 +08:00
|
|
|
|
if @term.present? && !@search_context
|
|
|
|
|
user_search
|
|
|
|
|
category_search
|
|
|
|
|
tags_search
|
|
|
|
|
groups_search
|
2013-05-23 02:36:14 +08:00
|
|
|
|
end
|
2013-05-24 02:26:51 +08:00
|
|
|
|
topic_search
|
2018-06-07 13:28:18 +08:00
|
|
|
|
end
|
2013-02-06 03:16:51 +08:00
|
|
|
|
|
2013-05-24 02:26:51 +08:00
|
|
|
|
@results
|
2014-03-08 03:59:29 +08:00
|
|
|
|
rescue ActiveRecord::StatementInvalid
|
|
|
|
|
# In the event of a PG:Error return nothing, it is likely they used a foreign language whose
|
|
|
|
|
# locale is not supported by postgres
|
2013-05-24 02:26:51 +08:00
|
|
|
|
end
|
2018-06-07 13:28:18 +08:00
|
|
|
|
|
2013-05-23 02:36:14 +08:00
|
|
|
|
# If we're searching for a single topic
|
|
|
|
|
def single_topic(id)
|
2019-01-14 17:30:45 +08:00
|
|
|
|
if @opts[:restrict_to_archetype].present?
|
|
|
|
|
archetype =
|
2023-01-09 20:10:19 +08:00
|
|
|
|
(
|
2019-01-14 17:30:45 +08:00
|
|
|
|
if @opts[:restrict_to_archetype] == Archetype.default
|
|
|
|
|
Archetype.default
|
2023-01-09 20:10:19 +08:00
|
|
|
|
else
|
2019-01-14 17:30:45 +08:00
|
|
|
|
Archetype.private_message
|
2023-01-09 20:10:19 +08:00
|
|
|
|
end
|
|
|
|
|
)
|
2020-08-06 14:15:31 +08:00
|
|
|
|
|
|
|
|
|
post =
|
|
|
|
|
posts_scope.joins(:topic).find_by(
|
|
|
|
|
"topics.id = :id AND topics.archetype = :archetype AND posts.post_number = 1",
|
|
|
|
|
id: id,
|
|
|
|
|
archetype: archetype,
|
|
|
|
|
)
|
2019-01-14 17:30:45 +08:00
|
|
|
|
else
|
2020-08-06 14:15:31 +08:00
|
|
|
|
post = posts_scope.find_by(topic_id: id, post_number: 1)
|
2019-01-14 17:30:45 +08:00
|
|
|
|
end
|
2020-08-06 14:15:31 +08:00
|
|
|
|
|
2014-09-02 17:15:08 +08:00
|
|
|
|
return nil unless @guardian.can_see?(post)
|
2013-05-14 05:04:41 +08:00
|
|
|
|
|
2014-09-02 17:15:08 +08:00
|
|
|
|
@results.add(post)
|
2013-05-24 02:26:51 +08:00
|
|
|
|
@results
|
2013-05-23 02:36:14 +08:00
|
|
|
|
end
|
|
|
|
|
|
2013-05-24 02:26:51 +08:00
|
|
|
|
def secure_category_ids
|
|
|
|
|
return @secure_category_ids unless @secure_category_ids.nil?
|
|
|
|
|
@secure_category_ids = @guardian.secure_category_ids
|
2013-05-23 02:36:14 +08:00
|
|
|
|
end
|
2013-02-06 03:16:51 +08:00
|
|
|
|
|
2013-05-24 02:26:51 +08:00
|
|
|
|
def category_search
|
2013-12-13 16:00:48 +08:00
|
|
|
|
# scope is leaking onto Category, this is not good and probably a bug in Rails
|
|
|
|
|
# the secure_category_ids will invoke the same method on User, it calls Category.where
|
|
|
|
|
# however the scope from the query below is leaking in to Category, this works around
|
|
|
|
|
# the issue while we figure out what is up in Rails
|
|
|
|
|
secure_category_ids
|
|
|
|
|
|
2013-05-24 02:26:51 +08:00
|
|
|
|
categories =
|
|
|
|
|
Category
|
|
|
|
|
.includes(:category_search_data)
|
|
|
|
|
.where("category_search_data.search_data @@ #{ts_query}")
|
2013-08-26 05:18:11 +08:00
|
|
|
|
.references(:category_search_data)
|
2013-05-24 02:26:51 +08:00
|
|
|
|
.order("topics_month DESC")
|
|
|
|
|
.secured(@guardian)
|
2017-08-01 08:28:16 +08:00
|
|
|
|
.limit(limit)
|
2013-05-14 05:04:41 +08:00
|
|
|
|
|
2014-09-02 17:15:08 +08:00
|
|
|
|
categories.each { |category| @results.add(category) }
|
2018-06-07 13:28:18 +08:00
|
|
|
|
end
|
2013-05-23 02:36:14 +08:00
|
|
|
|
|
2013-05-24 02:26:51 +08:00
|
|
|
|
def user_search
|
2015-10-29 02:56:08 +08:00
|
|
|
|
return if SiteSetting.hide_user_profiles_from_public && !@guardian.user
|
|
|
|
|
|
2021-04-27 13:52:45 +08:00
|
|
|
|
users =
|
|
|
|
|
User
|
|
|
|
|
.includes(:user_search_data)
|
2015-11-05 06:04:37 +08:00
|
|
|
|
.references(:user_search_data)
|
2015-11-19 04:06:59 +08:00
|
|
|
|
.where(active: true)
|
|
|
|
|
.where(staged: false)
|
2015-11-05 06:04:37 +08:00
|
|
|
|
.where("user_search_data.search_data @@ #{ts_query("simple")}")
|
2013-05-24 02:26:51 +08:00
|
|
|
|
.order("CASE WHEN username_lower = '#{@original_term.downcase}' THEN 0 ELSE 1 END")
|
|
|
|
|
.order("last_posted_at DESC")
|
2017-08-01 08:28:16 +08:00
|
|
|
|
.limit(limit)
|
2013-05-23 02:36:14 +08:00
|
|
|
|
|
2021-09-06 21:59:35 +08:00
|
|
|
|
if !SiteSetting.enable_listing_suspended_users_on_search && !@guardian.user&.admin
|
|
|
|
|
users = users.where(suspended_at: nil)
|
|
|
|
|
end
|
|
|
|
|
|
2021-04-27 13:52:45 +08:00
|
|
|
|
users_custom_data_query =
|
|
|
|
|
DB.query(<<~SQL, user_ids: users.pluck(:id), term: "%#{@original_term.downcase}%")
|
|
|
|
|
SELECT user_custom_fields.user_id, user_fields.name, user_custom_fields.value FROM user_custom_fields
|
|
|
|
|
INNER JOIN user_fields ON user_fields.id = REPLACE(user_custom_fields.name, 'user_field_', '')::INTEGER AND user_fields.searchable IS TRUE
|
|
|
|
|
WHERE user_id IN (:user_ids)
|
|
|
|
|
AND user_custom_fields.name LIKE 'user_field_%'
|
|
|
|
|
AND user_custom_fields.value ILIKE :term
|
|
|
|
|
SQL
|
|
|
|
|
users_custom_data =
|
|
|
|
|
users_custom_data_query.reduce({}) do |acc, row|
|
|
|
|
|
acc[row.user_id] = Array.wrap(acc[row.user_id]) << { name: row.name, value: row.value }
|
|
|
|
|
acc
|
|
|
|
|
end
|
|
|
|
|
|
2014-09-02 17:15:08 +08:00
|
|
|
|
users.each do |user|
|
2021-04-27 13:52:45 +08:00
|
|
|
|
user.custom_data = users_custom_data[user.id] || []
|
2014-09-02 17:15:08 +08:00
|
|
|
|
@results.add(user)
|
2013-05-24 02:26:51 +08:00
|
|
|
|
end
|
2018-06-07 13:28:18 +08:00
|
|
|
|
end
|
2013-05-23 02:36:14 +08:00
|
|
|
|
|
2019-03-04 17:30:09 +08:00
|
|
|
|
def groups_search
|
2023-01-17 02:48:00 +08:00
|
|
|
|
group_query =
|
|
|
|
|
Group.visible_groups(@guardian.user, "groups.name ASC", include_everyone: false).where(
|
|
|
|
|
"groups.name ILIKE :term OR groups.full_name ILIKE :term",
|
|
|
|
|
term: "%#{@term}%",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
DiscoursePluginRegistry.search_groups_set_query_callbacks.each do |cb|
|
|
|
|
|
group_query = cb.call(group_query, @term, @guardian)
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
groups = group_query.limit(limit)
|
2019-03-04 17:30:09 +08:00
|
|
|
|
|
|
|
|
|
groups.each { |group| @results.add(group) }
|
|
|
|
|
end
|
|
|
|
|
|
2017-08-25 23:52:18 +08:00
|
|
|
|
def tags_search
|
|
|
|
|
return unless SiteSetting.tagging_enabled
|
|
|
|
|
tags =
|
|
|
|
|
Tag
|
|
|
|
|
.includes(:tag_search_data)
|
|
|
|
|
.where("tag_search_data.search_data @@ #{ts_query}")
|
|
|
|
|
.references(:tag_search_data)
|
|
|
|
|
.order("name asc")
|
|
|
|
|
.limit(limit)
|
|
|
|
|
|
2020-09-02 18:24:40 +08:00
|
|
|
|
hidden_tag_names = DiscourseTagging.hidden_tag_names(@guardian)
|
|
|
|
|
|
|
|
|
|
tags.each { |tag| @results.add(tag) if !hidden_tag_names.include?(tag.name) }
|
2018-06-07 13:28:18 +08:00
|
|
|
|
end
|
2017-08-25 23:52:18 +08:00
|
|
|
|
|
2021-10-06 23:42:52 +08:00
|
|
|
|
def exclude_topics_search
|
|
|
|
|
if @term.present?
|
|
|
|
|
user_search
|
|
|
|
|
category_search
|
|
|
|
|
tags_search
|
|
|
|
|
groups_search
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2019-03-26 17:01:19 +08:00
|
|
|
|
PHRASE_MATCH_REGEXP_PATTERN = '"([^"]+)"'
|
|
|
|
|
|
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
|
|
|
|
def posts_query(limit, type_filter: nil, aggregate_search: false)
|
2015-09-25 23:43:04 +08:00
|
|
|
|
posts =
|
|
|
|
|
Post.where(post_type: Topic.visible_post_types(@guardian.user)).joins(
|
2014-12-04 10:46:52 +08:00
|
|
|
|
:post_search_data,
|
|
|
|
|
:topic,
|
|
|
|
|
)
|
2021-10-26 15:16:38 +08:00
|
|
|
|
|
|
|
|
|
if type_filter != "private_messages"
|
|
|
|
|
posts = posts.joins("LEFT JOIN categories ON categories.id = topics.category_id")
|
|
|
|
|
end
|
2015-02-19 09:56:49 +08:00
|
|
|
|
|
2018-05-07 17:43:55 +08:00
|
|
|
|
is_topic_search = @search_context.present? && @search_context.is_a?(Topic)
|
2019-05-01 23:36:20 +08:00
|
|
|
|
posts = posts.where("topics.visible") unless is_topic_search
|
2014-12-04 10:46:52 +08:00
|
|
|
|
|
2021-10-26 15:16:38 +08:00
|
|
|
|
if type_filter == "private_messages" || (is_topic_search && @search_context.private_message?)
|
2020-08-18 14:51:17 +08:00
|
|
|
|
posts =
|
|
|
|
|
posts.where(
|
|
|
|
|
"topics.archetype = ? AND post_search_data.private_message",
|
|
|
|
|
Archetype.private_message,
|
|
|
|
|
)
|
2013-08-02 08:31:36 +08:00
|
|
|
|
|
2020-08-18 14:51:17 +08:00
|
|
|
|
posts = posts.private_posts_for_user(@guardian.user) unless @guardian.is_admin?
|
2021-10-26 15:16:38 +08:00
|
|
|
|
elsif type_filter == "all_topics"
|
2020-08-18 14:51:17 +08:00
|
|
|
|
private_posts =
|
|
|
|
|
posts.where(
|
|
|
|
|
"topics.archetype = ? AND post_search_data.private_message",
|
|
|
|
|
Archetype.private_message,
|
|
|
|
|
).private_posts_for_user(@guardian.user)
|
2020-01-28 18:11:33 +08:00
|
|
|
|
|
2020-08-18 14:51:17 +08:00
|
|
|
|
posts =
|
|
|
|
|
posts.where(
|
|
|
|
|
"topics.archetype <> ? AND NOT post_search_data.private_message",
|
|
|
|
|
Archetype.private_message,
|
|
|
|
|
).or(private_posts)
|
2015-06-23 10:14:06 +08:00
|
|
|
|
else
|
2020-08-18 14:51:17 +08:00
|
|
|
|
posts =
|
|
|
|
|
posts.where(
|
|
|
|
|
"topics.archetype <> ? AND NOT post_search_data.private_message",
|
|
|
|
|
Archetype.private_message,
|
|
|
|
|
)
|
2018-06-07 13:28:18 +08:00
|
|
|
|
end
|
2016-07-25 13:06:25 +08:00
|
|
|
|
|
|
|
|
|
if @term.present?
|
2015-02-19 09:56:49 +08:00
|
|
|
|
if is_topic_search
|
2016-07-25 13:06:25 +08:00
|
|
|
|
term_without_quote = @term
|
|
|
|
|
term_without_quote = $1 if @term =~ /"(.+)"/
|
|
|
|
|
|
|
|
|
|
term_without_quote = $1 if @term =~ /'(.+)'/
|
2014-10-18 12:19:08 +08:00
|
|
|
|
|
2015-06-23 10:14:06 +08:00
|
|
|
|
posts = posts.joins("JOIN users u ON u.id = posts.user_id")
|
|
|
|
|
posts =
|
|
|
|
|
posts.where(
|
|
|
|
|
"posts.raw || ' ' || u.username || ' ' || COALESCE(u.name, '') ilike ?",
|
|
|
|
|
"%#{term_without_quote}%",
|
|
|
|
|
)
|
2018-06-07 13:28:18 +08:00
|
|
|
|
else
|
2018-02-20 11:41:00 +08:00
|
|
|
|
# A is for title
|
2015-06-23 10:14:06 +08:00
|
|
|
|
# B is for category
|
2018-02-20 11:41:00 +08:00
|
|
|
|
# C is for tags
|
|
|
|
|
# D is for cooked
|
2015-06-23 10:14:06 +08:00
|
|
|
|
weights = @in_title ? "A" : (SiteSetting.tagging_enabled ? "ABCD" : "ABD")
|
2020-07-16 12:21:19 +08:00
|
|
|
|
posts = posts.where(post_number: 1) if @in_title
|
2015-06-23 10:14:06 +08:00
|
|
|
|
posts = posts.where("post_search_data.search_data @@ #{ts_query(weight_filter: weights)}")
|
2019-03-26 17:01:19 +08:00
|
|
|
|
exact_terms = @term.scan(Regexp.new(PHRASE_MATCH_REGEXP_PATTERN)).flatten
|
2019-03-18 15:25:45 +08:00
|
|
|
|
|
2015-06-23 10:14:06 +08:00
|
|
|
|
exact_terms.each do |exact|
|
2018-05-08 13:59:03 +08:00
|
|
|
|
posts =
|
|
|
|
|
posts.where("posts.raw ilike :exact OR topics.title ilike :exact", exact: "%#{exact}%")
|
2014-10-18 12:34:05 +08:00
|
|
|
|
end
|
2015-06-23 10:14:06 +08:00
|
|
|
|
end
|
2018-06-07 13:28:18 +08:00
|
|
|
|
end
|
2014-10-18 12:19:08 +08:00
|
|
|
|
|
2013-05-25 04:17:09 +08:00
|
|
|
|
@filters.each do |block, match|
|
|
|
|
|
if block.arity == 1
|
2015-06-23 10:14:06 +08:00
|
|
|
|
posts = instance_exec(posts, &block) || posts
|
2013-05-25 04:17:09 +08:00
|
|
|
|
else
|
|
|
|
|
posts = instance_exec(posts, match, &block) || posts
|
2018-06-07 13:28:18 +08:00
|
|
|
|
end
|
2013-05-25 02:03:45 +08:00
|
|
|
|
end if @filters
|
|
|
|
|
|
2013-05-25 04:17:09 +08:00
|
|
|
|
# If we have a search context, prioritize those posts first
|
2019-03-18 15:25:45 +08:00
|
|
|
|
posts =
|
|
|
|
|
if @search_context.present?
|
|
|
|
|
if @search_context.is_a?(User)
|
2021-10-26 15:16:38 +08:00
|
|
|
|
if type_filter == "private_messages"
|
2020-11-20 00:56:19 +08:00
|
|
|
|
if @guardian.is_admin? && !@search_all_pms
|
|
|
|
|
posts.private_posts_for_user(@search_context)
|
|
|
|
|
else
|
|
|
|
|
posts
|
|
|
|
|
end
|
2019-03-18 15:25:45 +08:00
|
|
|
|
else
|
|
|
|
|
posts.where("posts.user_id = #{@search_context.id}")
|
|
|
|
|
end
|
|
|
|
|
elsif @search_context.is_a?(Category)
|
|
|
|
|
category_ids =
|
|
|
|
|
Category
|
|
|
|
|
.where(parent_category_id: @search_context.id)
|
|
|
|
|
.pluck(:id)
|
|
|
|
|
.push(@search_context.id)
|
|
|
|
|
|
|
|
|
|
posts.where("topics.category_id in (?)", category_ids)
|
2021-10-22 10:38:21 +08:00
|
|
|
|
elsif is_topic_search
|
2022-11-02 03:05:13 +08:00
|
|
|
|
posts.where("topics.id = ?", @search_context.id).order(
|
2019-03-18 15:25:45 +08:00
|
|
|
|
"posts.post_number #{@order == :latest ? "DESC" : ""}",
|
|
|
|
|
)
|
2019-11-14 08:40:26 +08:00
|
|
|
|
elsif @search_context.is_a?(Tag)
|
|
|
|
|
posts =
|
|
|
|
|
posts.joins("LEFT JOIN topic_tags ON topic_tags.topic_id = topics.id").joins(
|
|
|
|
|
"LEFT JOIN tags ON tags.id = topic_tags.tag_id",
|
|
|
|
|
)
|
2022-11-02 03:05:13 +08:00
|
|
|
|
posts.where("tags.id = ?", @search_context.id)
|
2013-05-25 02:03:45 +08:00
|
|
|
|
end
|
2019-03-18 15:25:45 +08:00
|
|
|
|
else
|
2019-03-19 11:23:14 +08:00
|
|
|
|
posts = categories_ignored(posts) unless @category_filter_matched
|
|
|
|
|
posts
|
2013-05-25 02:03:45 +08:00
|
|
|
|
end
|
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
|
|
|
|
|
2020-04-27 22:35:27 +08:00
|
|
|
|
if @order == :latest
|
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
|
|
|
|
if aggregate_search
|
2021-02-01 13:40:06 +08:00
|
|
|
|
posts = posts.order("MAX(posts.created_at) DESC")
|
|
|
|
|
else
|
|
|
|
|
posts = posts.reorder("posts.created_at DESC")
|
2014-08-23 04:55:19 +08:00
|
|
|
|
end
|
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
|
|
|
|
elsif @order == :latest_topic
|
|
|
|
|
if aggregate_search
|
2021-02-01 13:40:06 +08:00
|
|
|
|
posts = posts.order("MAX(topics.created_at) DESC")
|
|
|
|
|
else
|
|
|
|
|
posts = posts.order("topics.created_at DESC")
|
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
|
|
|
|
end
|
|
|
|
|
elsif @order == :views
|
|
|
|
|
if aggregate_search
|
2021-02-01 13:40:06 +08:00
|
|
|
|
posts = posts.order("MAX(topics.views) DESC")
|
|
|
|
|
else
|
|
|
|
|
posts = posts.order("topics.views DESC")
|
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
|
|
|
|
end
|
|
|
|
|
elsif @order == :likes
|
2021-02-01 13:40:06 +08:00
|
|
|
|
if aggregate_search
|
|
|
|
|
posts = posts.order("MAX(posts.like_count) DESC")
|
|
|
|
|
else
|
|
|
|
|
posts = posts.order("posts.like_count DESC")
|
|
|
|
|
end
|
2021-10-22 10:38:21 +08:00
|
|
|
|
elsif !is_topic_search
|
2023-01-31 13:34:01 +08:00
|
|
|
|
exact_rank = nil
|
|
|
|
|
|
|
|
|
|
if SiteSetting.prioritize_exact_search_title_match
|
|
|
|
|
exact_rank = ts_rank_cd(weight_filter: "A", prefix_match: false)
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
rank = ts_rank_cd(weight_filter: weights)
|
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
|
|
|
|
|
2021-10-26 15:16:38 +08:00
|
|
|
|
if type_filter != "private_messages"
|
|
|
|
|
category_search_priority = <<~SQL
|
|
|
|
|
(
|
|
|
|
|
CASE categories.search_priority
|
|
|
|
|
WHEN #{Searchable::PRIORITIES[:very_high]}
|
|
|
|
|
THEN 3
|
|
|
|
|
WHEN #{Searchable::PRIORITIES[:very_low]}
|
|
|
|
|
THEN 1
|
|
|
|
|
ELSE 2
|
|
|
|
|
END
|
|
|
|
|
)
|
|
|
|
|
SQL
|
2020-12-23 15:14:41 +08:00
|
|
|
|
|
2021-10-26 15:16:38 +08:00
|
|
|
|
category_priority_weights = <<~SQL
|
|
|
|
|
(
|
|
|
|
|
CASE categories.search_priority
|
|
|
|
|
WHEN #{Searchable::PRIORITIES[:low]}
|
|
|
|
|
THEN #{SiteSetting.category_search_priority_low_weight}
|
|
|
|
|
WHEN #{Searchable::PRIORITIES[:high]}
|
|
|
|
|
THEN #{SiteSetting.category_search_priority_high_weight}
|
|
|
|
|
ELSE
|
|
|
|
|
CASE WHEN topics.closed
|
|
|
|
|
THEN 0.9
|
|
|
|
|
ELSE 1
|
|
|
|
|
END
|
2020-04-27 22:35:27 +08:00
|
|
|
|
END
|
2021-10-26 15:16:38 +08:00
|
|
|
|
)
|
|
|
|
|
SQL
|
2019-03-20 15:24:23 +08:00
|
|
|
|
|
2023-01-31 13:34:01 +08:00
|
|
|
|
posts =
|
|
|
|
|
if aggregate_search
|
|
|
|
|
posts.order("MAX(#{category_search_priority}) DESC")
|
|
|
|
|
else
|
|
|
|
|
posts.order("#{category_search_priority} DESC")
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
if @term.present? && exact_rank
|
|
|
|
|
posts =
|
|
|
|
|
if aggregate_search
|
|
|
|
|
posts.order("MAX(#{exact_rank} * #{category_priority_weights}) DESC")
|
|
|
|
|
else
|
|
|
|
|
posts.order("#{exact_rank} * #{category_priority_weights} DESC")
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2021-10-26 15:16:38 +08:00
|
|
|
|
data_ranking =
|
|
|
|
|
if @term.blank?
|
|
|
|
|
"(#{category_priority_weights})"
|
|
|
|
|
else
|
|
|
|
|
"(#{rank} * #{category_priority_weights})"
|
|
|
|
|
end
|
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
|
|
|
|
|
2021-10-26 15:16:38 +08:00
|
|
|
|
posts =
|
|
|
|
|
if aggregate_search
|
2023-01-31 13:34:01 +08:00
|
|
|
|
posts.order("MAX(#{data_ranking}) DESC")
|
2021-10-26 15:16:38 +08:00
|
|
|
|
else
|
2023-01-31 13:34:01 +08:00
|
|
|
|
posts.order("#{data_ranking} DESC")
|
2021-10-26 15:16:38 +08:00
|
|
|
|
end
|
|
|
|
|
end
|
2013-05-23 02:36:14 +08:00
|
|
|
|
|
2021-02-01 13:40:06 +08:00
|
|
|
|
posts = posts.order("topics.bumped_at DESC")
|
2013-05-23 23:13:23 +08:00
|
|
|
|
end
|
|
|
|
|
|
2021-10-26 15:16:38 +08:00
|
|
|
|
if type_filter != "private_messages"
|
|
|
|
|
posts =
|
|
|
|
|
if secure_category_ids.present?
|
|
|
|
|
posts.where(
|
|
|
|
|
"(categories.id IS NULL) OR (NOT categories.read_restricted) OR (categories.id IN (?))",
|
|
|
|
|
secure_category_ids,
|
|
|
|
|
).references(:categories)
|
|
|
|
|
else
|
|
|
|
|
posts.where("(categories.id IS NULL) OR (NOT categories.read_restricted)").references(
|
|
|
|
|
:categories,
|
|
|
|
|
)
|
|
|
|
|
end
|
|
|
|
|
end
|
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
|
|
|
|
|
2020-08-07 10:47:00 +08:00
|
|
|
|
if @order
|
|
|
|
|
advanced_order = Search.advanced_orders&.fetch(@order, nil)
|
|
|
|
|
posts = advanced_order.call(posts) if advanced_order
|
|
|
|
|
end
|
|
|
|
|
|
2017-08-01 03:28:48 +08:00
|
|
|
|
posts = posts.offset(offset)
|
2017-07-21 00:12:34 +08:00
|
|
|
|
posts.limit(limit)
|
2014-05-15 22:31:45 +08:00
|
|
|
|
end
|
|
|
|
|
|
2023-01-31 13:34:01 +08:00
|
|
|
|
def ts_rank_cd(weight_filter:, prefix_match: true)
|
|
|
|
|
<<~SQL
|
|
|
|
|
TS_RANK_CD(
|
|
|
|
|
#{SiteSetting.search_ranking_weights.present? ? "'#{SiteSetting.search_ranking_weights}'," : ""}
|
|
|
|
|
post_search_data.search_data,
|
|
|
|
|
#{@term.blank? ? "" : ts_query(weight_filter: weight_filter, prefix_match: prefix_match)},
|
|
|
|
|
#{SiteSetting.search_ranking_normalization}|32
|
|
|
|
|
)
|
|
|
|
|
SQL
|
|
|
|
|
end
|
|
|
|
|
|
2019-03-18 15:25:45 +08:00
|
|
|
|
def categories_ignored(posts)
|
|
|
|
|
posts.where(<<~SQL, Searchable::PRIORITIES[:ignore])
|
2020-08-17 13:50:27 +08:00
|
|
|
|
(categories.search_priority IS NULL OR categories.search_priority IS NOT NULL AND categories.search_priority <> ?)
|
2019-03-18 15:25:45 +08:00
|
|
|
|
SQL
|
|
|
|
|
end
|
|
|
|
|
|
2018-02-20 11:41:00 +08:00
|
|
|
|
def self.default_ts_config
|
|
|
|
|
"'#{Search.ts_config}'"
|
2018-06-07 13:28:18 +08:00
|
|
|
|
end
|
2015-08-10 15:41:14 +08:00
|
|
|
|
|
2017-08-01 03:28:48 +08:00
|
|
|
|
def default_ts_config
|
|
|
|
|
self.class.default_ts_config
|
2018-06-07 13:28:18 +08:00
|
|
|
|
end
|
2015-08-10 15:41:14 +08:00
|
|
|
|
|
2023-01-31 13:34:01 +08:00
|
|
|
|
def self.ts_query(term:, ts_config: nil, joiner: nil, weight_filter: nil, prefix_match: true)
|
|
|
|
|
to_tsquery(
|
|
|
|
|
ts_config: ts_config,
|
|
|
|
|
term: set_tsquery_weight_filter(term, weight_filter, prefix_match: prefix_match),
|
|
|
|
|
)
|
2020-07-28 11:53:25 +08:00
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def self.to_tsquery(ts_config: nil, term:, joiner: nil)
|
2017-08-01 03:28:48 +08:00
|
|
|
|
ts_config = ActiveRecord::Base.connection.quote(ts_config) if ts_config
|
2022-12-12 23:57:20 +08:00
|
|
|
|
escaped_term = wrap_unaccent("'#{escape_string(term)}'")
|
|
|
|
|
tsquery = "TO_TSQUERY(#{ts_config || default_ts_config}, #{escaped_term})"
|
|
|
|
|
tsquery = "REPLACE(#{tsquery}::text, '&', '#{escape_string(joiner)}')::tsquery" if joiner
|
2020-07-08 15:29:02 +08:00
|
|
|
|
tsquery
|
2016-08-13 01:04:46 +08:00
|
|
|
|
end
|
2014-08-28 15:15:53 +08:00
|
|
|
|
|
2023-01-31 13:34:01 +08:00
|
|
|
|
def self.set_tsquery_weight_filter(term, weight_filter, prefix_match: true)
|
|
|
|
|
"'#{self.escape_string(term)}':#{prefix_match ? "*" : ""}#{weight_filter}"
|
2020-10-19 14:18:04 +08:00
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def self.escape_string(term)
|
2023-01-10 01:19:51 +08:00
|
|
|
|
# HACK: The ’ and other similar characters have to be "unaccented" before
|
|
|
|
|
# it is escaped or the resulting tsqueries will be invalid
|
|
|
|
|
if SiteSetting.search_ignore_accents
|
|
|
|
|
term = term.gsub(/[\u02b9\u02bb\u02bc\u02bd\u02c8\u2018\u2019\u201b\u2032\uff07]/, "'")
|
|
|
|
|
end
|
2022-12-12 23:57:20 +08:00
|
|
|
|
|
2020-10-19 14:18:04 +08:00
|
|
|
|
PG::Connection.escape_string(term).gsub('\\', '\\\\\\')
|
2020-07-28 11:53:25 +08:00
|
|
|
|
end
|
|
|
|
|
|
2023-01-31 13:34:01 +08:00
|
|
|
|
def ts_query(ts_config = nil, weight_filter: nil, prefix_match: true)
|
2015-09-18 15:16:37 +08:00
|
|
|
|
@ts_query_cache ||= {}
|
|
|
|
|
@ts_query_cache[
|
2023-01-31 13:34:01 +08:00
|
|
|
|
"#{ts_config || default_ts_config} #{@term} #{weight_filter} #{prefix_match}"
|
|
|
|
|
] ||= Search.ts_query(
|
|
|
|
|
term: @term,
|
|
|
|
|
ts_config: ts_config,
|
|
|
|
|
weight_filter: weight_filter,
|
|
|
|
|
prefix_match: prefix_match,
|
|
|
|
|
)
|
2018-06-07 13:28:18 +08:00
|
|
|
|
end
|
2015-09-18 15:16:37 +08:00
|
|
|
|
|
2017-07-21 00:12:34 +08:00
|
|
|
|
def wrap_rows(query)
|
2017-03-08 22:46:23 +08:00
|
|
|
|
"SELECT *, row_number() over() row_number FROM (#{query.to_sql}) xxx"
|
2015-09-18 15:49:57 +08:00
|
|
|
|
end
|
2014-09-01 15:04:40 +08:00
|
|
|
|
|
2016-08-13 01:04:46 +08:00
|
|
|
|
def aggregate_post_sql(opts)
|
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
|
|
|
|
default_opts = { type_filter: opts[:type_filter] }
|
|
|
|
|
|
2020-08-21 16:16:28 +08:00
|
|
|
|
min_id =
|
|
|
|
|
if SiteSetting.search_recent_regular_posts_offset_post_id > 0
|
|
|
|
|
if %w[all_topics private_message].include?(opts[:type_filter])
|
|
|
|
|
0
|
|
|
|
|
else
|
|
|
|
|
SiteSetting.search_recent_regular_posts_offset_post_id
|
|
|
|
|
end
|
|
|
|
|
else
|
|
|
|
|
# This is kept around for backwards compatibility.
|
|
|
|
|
# TODO: Drop this code path after Discourse 2.7 has been released.
|
|
|
|
|
Search.min_post_id
|
|
|
|
|
end
|
2016-08-13 01:04:46 +08:00
|
|
|
|
|
2021-02-01 13:40:06 +08:00
|
|
|
|
min_or_max = @order == :latest ? "max" : "min"
|
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
|
|
|
|
|
2021-02-01 13:40:06 +08:00
|
|
|
|
query =
|
|
|
|
|
if @order == :likes
|
|
|
|
|
# likes are a pain to aggregate so skip
|
|
|
|
|
posts_query(limit, type_filter: opts[:type_filter]).select("topics.id", "posts.post_number")
|
2018-06-07 13:28:18 +08:00
|
|
|
|
else
|
2021-02-01 13:40:06 +08:00
|
|
|
|
posts_query(limit, aggregate_search: true, type_filter: opts[:type_filter]).select(
|
|
|
|
|
"topics.id",
|
|
|
|
|
"#{min_or_max}(posts.post_number) post_number",
|
|
|
|
|
).group("topics.id")
|
2016-08-13 01:04:46 +08:00
|
|
|
|
end
|
|
|
|
|
|
2021-02-01 13:40:06 +08:00
|
|
|
|
if min_id > 0
|
2022-11-02 03:05:13 +08:00
|
|
|
|
low_set = query.dup.where("post_search_data.post_id < ?", min_id)
|
|
|
|
|
high_set = query.where("post_search_data.post_id >= ?", min_id)
|
2018-06-07 13:28:18 +08:00
|
|
|
|
|
2021-02-01 13:40:06 +08:00
|
|
|
|
return { default: wrap_rows(high_set), remaining: wrap_rows(low_set) }
|
2016-08-13 01:04:46 +08:00
|
|
|
|
end
|
2021-02-01 13:40:06 +08:00
|
|
|
|
|
|
|
|
|
# double wrapping so we get correct row numbers
|
|
|
|
|
{ default: wrap_rows(query) }
|
2018-06-07 13:28:18 +08:00
|
|
|
|
end
|
2014-09-03 10:13:13 +08:00
|
|
|
|
|
2016-08-13 01:04:46 +08:00
|
|
|
|
def aggregate_posts(post_sql)
|
|
|
|
|
return [] unless post_sql
|
|
|
|
|
|
2020-08-06 14:15:31 +08:00
|
|
|
|
posts_scope(posts_eager_loads(Post)).joins(
|
2016-08-13 01:04:46 +08:00
|
|
|
|
"JOIN (#{post_sql}) x ON x.id = posts.topic_id AND x.post_number = posts.post_number",
|
|
|
|
|
).order("row_number")
|
2018-06-07 13:28:18 +08:00
|
|
|
|
end
|
2014-09-01 15:04:40 +08:00
|
|
|
|
|
2016-08-13 05:18:12 +08:00
|
|
|
|
def aggregate_search(opts = {})
|
2016-08-13 01:04:46 +08:00
|
|
|
|
post_sql = aggregate_post_sql(opts)
|
2017-03-08 22:46:23 +08:00
|
|
|
|
|
2016-08-13 05:18:12 +08:00
|
|
|
|
added = 0
|
|
|
|
|
|
2016-08-13 01:04:46 +08:00
|
|
|
|
aggregate_posts(post_sql[:default]).each do |p|
|
|
|
|
|
@results.add(p)
|
2014-08-23 04:55:19 +08:00
|
|
|
|
added += 1
|
|
|
|
|
end
|
|
|
|
|
|
2014-12-04 10:46:52 +08:00
|
|
|
|
aggregate_posts(post_sql[:remaining]).each { |p| @results.add(p) } if added < limit
|
2018-06-07 13:28:18 +08:00
|
|
|
|
end
|
2014-12-04 10:46:52 +08:00
|
|
|
|
|
2013-05-24 02:26:51 +08:00
|
|
|
|
def private_messages_search
|
2017-05-31 07:14:09 +08:00
|
|
|
|
raise Discourse::InvalidAccess.new("anonymous can not search PMs") unless @guardian.user
|
|
|
|
|
|
2020-01-28 18:11:33 +08:00
|
|
|
|
aggregate_search(type_filter: "private_messages")
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def all_topics_search
|
|
|
|
|
aggregate_search(type_filter: "all_topics")
|
2013-05-23 23:13:23 +08:00
|
|
|
|
end
|
2013-02-06 03:16:51 +08:00
|
|
|
|
|
2017-05-31 07:14:09 +08:00
|
|
|
|
def topic_search
|
2014-09-02 17:15:08 +08:00
|
|
|
|
if @search_context.is_a?(Topic)
|
2020-08-06 14:15:31 +08:00
|
|
|
|
posts =
|
|
|
|
|
posts_scope(posts_eager_loads(posts_query(limit))).where(
|
2017-05-31 07:14:09 +08:00
|
|
|
|
"posts.topic_id = ?",
|
|
|
|
|
@search_context.id,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
posts.each { |post| @results.add(post) }
|
2018-06-07 13:28:18 +08:00
|
|
|
|
else
|
2017-07-21 00:12:34 +08:00
|
|
|
|
aggregate_search
|
2018-06-07 13:28:18 +08:00
|
|
|
|
end
|
|
|
|
|
end
|
2017-05-31 07:14:09 +08:00
|
|
|
|
|
|
|
|
|
def posts_eager_loads(query)
|
2020-07-17 16:27:30 +08:00
|
|
|
|
query = query.includes(:user, :post_search_data)
|
2017-05-31 07:14:09 +08:00
|
|
|
|
topic_eager_loads = [:category]
|
2018-06-07 13:28:18 +08:00
|
|
|
|
|
2017-05-31 07:14:09 +08:00
|
|
|
|
topic_eager_loads << :tags if SiteSetting.tagging_enabled
|
|
|
|
|
|
2020-09-14 09:58:28 +08:00
|
|
|
|
Search.custom_topic_eager_loads.each do |custom_loads|
|
|
|
|
|
topic_eager_loads.concat(
|
|
|
|
|
custom_loads.is_a?(Array) ? custom_loads : custom_loads.call(search_pms: @search_pms).to_a,
|
|
|
|
|
)
|
|
|
|
|
end
|
|
|
|
|
|
2017-05-31 07:14:09 +08:00
|
|
|
|
query.includes(topic: topic_eager_loads)
|
2018-06-07 13:28:18 +08:00
|
|
|
|
end
|
|
|
|
|
|
PERF: Limit characters used to generate headline for search blurb.
We determined using the following benchmark script that limiting to 2500 chars would mean a maximum of
25ms spent generating headlines.
```
require 'benchmark/ips'
string = <<~STRING
Far far away, behind the word mountains...
STRING
def sql_excerpt(string, l = 1000000)
DB.query_single(<<~SQL)
SELECT TS_HEADLINE('english', left('#{string}', #{l}), PLAINTO_TSQUERY('mountains'))
SQL
end
def ruby_excerpt(string)
output = DB.query_single("SELECT '#{string}'")[0]
Search::GroupedSearchResults::TextHelper.excerpt(output, 'mountains', radius: 100)
end
puts "Ruby Excerpt: #{ruby_excerpt(string)}"
puts "SQL Excerpt: #{sql_excerpt(string)}"
puts
Benchmark.ips do |x|
x.time = 10
[1000, 2500, 5000, 10000, 20000, 50000].each do |l|
short_string = string[0..l]
x.report("ts_headline excerpt #{l}") do
sql_excerpt(short_string, l)
end
x.report("actionview excerpt #{l}") do
ruby_excerpt(short_string)
end
end
x.compare!
end
```
```
actionview excerpt 1000: 20570.7 i/s
actionview excerpt 2500: 17863.1 i/s - 1.15x (± 0.00) slower
actionview excerpt 5000: 14228.9 i/s - 1.45x (± 0.00) slower
actionview excerpt 10000: 10906.2 i/s - 1.89x (± 0.00) slower
actionview excerpt 20000: 6255.0 i/s - 3.29x (± 0.00) slower
ts_headline excerpt 1000: 4337.5 i/s - 4.74x (± 0.00) slower
actionview excerpt 50000: 3222.7 i/s - 6.38x (± 0.00) slower
ts_headline excerpt 2500: 2240.4 i/s - 9.18x (± 0.00) slower
ts_headline excerpt 5000: 1258.7 i/s - 16.34x (± 0.00) slower
ts_headline excerpt 10000: 667.2 i/s - 30.83x (± 0.00) slower
ts_headline excerpt 20000: 348.7 i/s - 58.98x (± 0.00) slower
ts_headline excerpt 50000: 131.9 i/s - 155.91x (± 0.00) slower
```
2020-08-07 14:36:12 +08:00
|
|
|
|
# Limited for performance reasons since `TS_HEADLINE` is slow when the text
|
|
|
|
|
# document is too long.
|
|
|
|
|
MAX_LENGTH_FOR_HEADLINE = 2500
|
|
|
|
|
|
2020-08-06 14:15:31 +08:00
|
|
|
|
def posts_scope(default_scope = Post.all)
|
|
|
|
|
if SiteSetting.use_pg_headlines_for_excerpt
|
2020-10-19 14:18:04 +08:00
|
|
|
|
search_term = @term.present? ? Search.escape_string(@term) : nil
|
2020-08-07 12:43:09 +08:00
|
|
|
|
ts_config = default_ts_config
|
|
|
|
|
|
2020-08-06 14:15:31 +08:00
|
|
|
|
default_scope
|
|
|
|
|
.joins("INNER JOIN post_search_data pd ON pd.post_id = posts.id")
|
2020-08-07 12:43:09 +08:00
|
|
|
|
.joins("INNER JOIN topics t1 ON t1.id = posts.topic_id")
|
2020-08-06 14:15:31 +08:00
|
|
|
|
.select(
|
2020-08-12 15:33:26 +08:00
|
|
|
|
"TS_HEADLINE(
|
|
|
|
|
#{ts_config},
|
|
|
|
|
t1.fancy_title,
|
|
|
|
|
PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'),
|
2020-12-21 14:42:03 +08:00
|
|
|
|
'StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>'', HighlightAll=true'
|
2020-08-12 15:33:26 +08:00
|
|
|
|
) AS topic_title_headline",
|
|
|
|
|
"TS_HEADLINE(
|
|
|
|
|
#{ts_config},
|
|
|
|
|
LEFT(
|
|
|
|
|
TS_HEADLINE(
|
|
|
|
|
#{ts_config},
|
|
|
|
|
LEFT(pd.raw_data, #{MAX_LENGTH_FOR_HEADLINE}),
|
|
|
|
|
PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'),
|
|
|
|
|
'ShortWord=0, MaxFragments=1, MinWords=50, MaxWords=51, StartSel='''', StopSel='''''
|
|
|
|
|
),
|
|
|
|
|
#{Search::GroupedSearchResults::BLURB_LENGTH}
|
|
|
|
|
),
|
|
|
|
|
PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'),
|
|
|
|
|
'HighlightAll=true, StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>'''
|
|
|
|
|
) AS headline",
|
|
|
|
|
"LEFT(pd.raw_data, 50) AS leading_raw_data",
|
|
|
|
|
"RIGHT(pd.raw_data, 50) AS trailing_raw_data",
|
2020-08-06 14:15:31 +08:00
|
|
|
|
default_scope.arel.projections,
|
|
|
|
|
)
|
|
|
|
|
else
|
|
|
|
|
default_scope
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
2021-11-18 09:21:12 +08:00
|
|
|
|
def log_query?(readonly_mode)
|
|
|
|
|
SiteSetting.log_search_queries? && @opts[:search_type].present? && !readonly_mode &&
|
|
|
|
|
@opts[:type_filter] != "exclude_topics"
|
|
|
|
|
end
|
2022-01-26 15:24:11 +08:00
|
|
|
|
|
|
|
|
|
def min_search_term_length
|
|
|
|
|
return @opts[:min_search_term_length] if @opts[:min_search_term_length]
|
|
|
|
|
|
|
|
|
|
if SiteSetting.search_tokenize_chinese
|
|
|
|
|
return SiteSetting.defaults.get("min_search_term_length", "zh_CN")
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
if SiteSetting.search_tokenize_japanese
|
|
|
|
|
return SiteSetting.defaults.get("min_search_term_length", "ja")
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
SiteSetting.min_search_term_length
|
|
|
|
|
end
|
2013-02-06 03:16:51 +08:00
|
|
|
|
end
|