require_dependency 'search/grouped_search_results' class Search def self.per_facet 5 end def self.per_filter 50 end # Sometimes we want more topics than are returned due to exclusion of dupes. This is the # factor of extra results we'll ask for. def self.burst_factor 3 end def self.facets %w(topic category user) end def self.long_locale # if adding a language see: # /usr/share/postgresql/9.3/tsearch_data for possible options # Do not add languages that are missing without amending the # base docker config # case SiteSetting.default_locale.to_sym when :da then 'danish' when :de then 'german' when :en then 'english' when :es then 'spanish' when :fr then 'french' when :it then 'italian' when :nl then 'dutch' when :nb_NO then 'norwegian' when :pt then 'portuguese' when :pt_BR then 'portuguese' when :sv then 'swedish' when :ru then 'russian' else 'simple' # use the 'simple' stemmer for other languages end end def self.rebuild_problem_posts(limit = 10000) posts = Post.joins(:topic) .where('posts.id IN ( SELECT p2.id FROM posts p2 LEFT JOIN post_search_data pd ON locale = ? AND p2.id = pd.post_id WHERE pd.post_id IS NULL )', SiteSetting.default_locale).limit(10000) posts.each do |post| # force indexing post.cooked += " " SearchObserver.index(post) end posts = Post.joins(:topic) .where('posts.id IN ( SELECT p2.id FROM posts p2 LEFT JOIN topic_search_data pd ON locale = ? AND p2.topic_id = pd.topic_id WHERE pd.topic_id IS NULL AND p2.post_number = 1 )', SiteSetting.default_locale).limit(10000) posts.each do |post| # force indexing post.cooked += " " SearchObserver.index(post) end nil end def self.prepare_data(search_data) data = search_data.squish # TODO rmmseg is designed for chinese, we need something else for Korean / Japanese if ['zh_TW', 'zh_CN', 'ja', 'ko'].include?(SiteSetting.default_locale) unless defined? RMMSeg require 'rmmseg' RMMSeg::Dictionary.load_dictionaries end algo = RMMSeg::Algorithm.new(search_data) data = "" while token = algo.next_token data << token.text << " " end end data.force_encoding("UTF-8") data end def initialize(term, opts=nil) term = process_advanced_search!(term) if term.present? @term = Search.prepare_data(term.to_s) @original_term = PG::Connection.escape_string(@term) end @opts = opts || {} @guardian = @opts[:guardian] || Guardian.new @search_context = @opts[:search_context] @include_blurbs = @opts[:include_blurbs] || false @limit = Search.per_facet if @opts[:type_filter].present? @limit = Search.per_filter end @results = GroupedSearchResults.new(@opts[:type_filter], term, @search_context, @include_blurbs) end def self.execute(term, opts=nil) self.new(term, opts).execute end # Query a term def execute return nil if @term.blank? || @term.length < (@opts[:min_search_term_length] || SiteSetting.min_search_term_length) # If the term is a number or url to a topic, just include that topic if @opts[:search_for_id] && @results.type_filter == 'topic' if @term =~ /^\d+$/ single_topic(@term.to_i) else begin route = Rails.application.routes.recognize_path(@term) single_topic(route[:topic_id]) if route[:topic_id].present? rescue ActionController::RoutingError end end end find_grouped_results unless @results.posts.present? @results end private def process_advanced_search!(term) term.to_s.split(/\s+/).map do |word| if word == 'status:open' @status = :open nil elsif word == 'status:closed' @status = :closed nil elsif word == 'order:latest' @order = :latest nil else word end end.compact.join(' ') end def find_grouped_results if @results.type_filter.present? raise Discourse::InvalidAccess.new("invalid type filter") unless Search.facets.include?(@results.type_filter) send("#{@results.type_filter}_search") else @limit = Search.per_facet + 1 unless @search_context user_search category_search end topic_search end add_more_topics_if_expected @results rescue ActiveRecord::StatementInvalid # In the event of a PG:Error return nothing, it is likely they used a foreign language whose # locale is not supported by postgres end # Add more topics if we expected them def add_more_topics_if_expected expected_topics = 0 expected_topics = Search.facets.size unless @results.type_filter.present? expected_topics = Search.per_facet * Search.facets.size if @results.type_filter == 'topic' expected_topics -= @results.posts.length if expected_topics > 0 extra_posts = posts_query(expected_topics * Search.burst_factor) extra_posts = extra_posts.where("posts.topic_id NOT in (?)", @results.posts.map(&:topic_id)) if @results.posts.present? extra_posts.each do |post| @results.add(post) expected_topics -= 1 break if expected_topics == 0 end end end # If we're searching for a single topic def single_topic(id) post = Post.find_by(topic_id: id, post_number: 1) return nil unless @guardian.can_see?(post) @results.add(post) @results end def secure_category_ids return @secure_category_ids unless @secure_category_ids.nil? @secure_category_ids = @guardian.secure_category_ids end def category_search # scope is leaking onto Category, this is not good and probably a bug in Rails # the secure_category_ids will invoke the same method on User, it calls Category.where # however the scope from the query below is leaking in to Category, this works around # the issue while we figure out what is up in Rails secure_category_ids categories = Category.includes(:category_search_data) .where("category_search_data.search_data @@ #{ts_query}") .references(:category_search_data) .order("topics_month DESC") .secured(@guardian) .limit(@limit) categories.each do |category| @results.add(category) end end def user_search users = User.includes(:user_search_data) .where("user_search_data.search_data @@ #{ts_query("simple")}") .order("CASE WHEN username_lower = '#{@original_term.downcase}' THEN 0 ELSE 1 END") .order("last_posted_at DESC") .limit(@limit) .references(:user_search_data) users.each do |user| @results.add(user) end end def posts_query(limit, opts=nil) opts ||= {} posts = Post .joins(:post_search_data, {:topic => :category}) .where("topics.deleted_at" => nil) .where("topics.visible") .where("topics.archetype <> ?", Archetype.private_message) if @search_context.present? && @search_context.is_a?(Topic) posts = posts.where("posts.raw ilike ?", "%#{@term}%") else posts = posts.where("post_search_data.search_data @@ #{ts_query}") end if @status == :open posts = posts.where('NOT topics.closed AND NOT topics.archived') elsif @status == :closed posts = posts.where('topics.closed OR topics.archived') end # If we have a search context, prioritize those posts first if @search_context.present? if @search_context.is_a?(User) posts = posts.where("posts.user_id = #{@search_context.id}") elsif @search_context.is_a?(Category) posts = posts.where("topics.category_id = #{@search_context.id}") elsif @search_context.is_a?(Topic) posts = posts.where("topics.id = #{@search_context.id}") .order("posts.post_number") end end if @order == :latest if opts[:aggregate_search] posts = posts.order("MAX(posts.created_at) DESC") else posts = posts.order("posts.created_at DESC") end else posts = posts.order("TS_RANK_CD(TO_TSVECTOR(#{query_locale}, topics.title), #{ts_query}) DESC") data_ranking = "TS_RANK_CD(post_search_data.search_data, #{ts_query})" if opts[:aggregate_search] posts = posts.order("SUM(#{data_ranking}) DESC") else posts = posts.order("#{data_ranking} DESC") end posts = posts.order("topics.bumped_at DESC") end if secure_category_ids.present? posts = posts.where("(categories.id IS NULL) OR (NOT categories.read_restricted) OR (categories.id IN (?))", secure_category_ids).references(:categories) else posts = posts.where("(categories.id IS NULL) OR (NOT categories.read_restricted)").references(:categories) end posts.limit(limit) end def self.query_locale @query_locale ||= Post.sanitize(Search.long_locale) end def query_locale self.class.query_locale end def self.ts_query(term, locale = nil, joiner = "&") locale = Post.sanitize(locale) if locale all_terms = term.gsub(/[*:()&!'"]/,'').squish.split query = Post.sanitize(all_terms.map {|t| "#{PG::Connection.escape_string(t)}:*"}.join(" #{joiner} ")) "TO_TSQUERY(#{locale || query_locale}, #{query})" end def ts_query(locale=nil) if !locale @ts_query ||= begin Search.ts_query(@term, locale) end else Search.ts_query(@term, locale) end end def aggregate_search post_sql = posts_query(@limit, aggregate_search: true) .select('topics.id', 'min(post_number) post_number') .group('topics.id') .to_sql # double wrapping so we get correct row numbers post_sql = "SELECT *, row_number() over() row_number FROM (#{post_sql}) xxx" posts = Post.includes(:topic => :category) .joins("JOIN (#{post_sql}) x ON x.id = posts.topic_id AND x.post_number = posts.post_number") .order('row_number') posts.each do |post| @results.add(post) end end def topic_search if @search_context.is_a?(Topic) posts = posts_query(@limit).where('posts.topic_id = ?', @search_context.id).includes(:topic => :category) posts.each do |post| @results.add(post) end else aggregate_search end end end