From f25849501d34bf4bf650cc3fe0ab95b9cfce1c0b Mon Sep 17 00:00:00 2001
From: Sam <sam.saffron@gmail.com>
Date: Tue, 12 Sep 2023 16:21:01 +1000
Subject: [PATCH] FEATURE: allow consumers to parse a search string (#23528)

This extends search so it can have consumers that:

1. Can split off "term" from various advanced filters and orders
2. Can build a relation of either order or filter

It also moves a lot of stuff around in the search class for clarity.

Two new APIs are exposed:

`.apply_filter` to apply all the special filters to a posts/topics relation
`.apply_order` to force a particular order (eg: order:latest)

This can then be used by semantic search in Discourse AI
---
 lib/search.rb           | 243 ++++++++++++++++++++++------------------
 spec/lib/search_spec.rb |  17 +++
 2 files changed, 153 insertions(+), 107 deletions(-)

diff --git a/lib/search.rb b/lib/search.rb
index 748cc9bd991..3183f756afe 100644
--- a/lib/search.rb
+++ b/lib/search.rb
@@ -745,6 +745,71 @@ class Search
     posts.where("topics.views <= ?", match.to_i)
   end
 
+  def apply_filters(posts)
+    @filters.each do |block, match|
+      if block.arity == 1
+        posts = instance_exec(posts, &block) || posts
+      else
+        posts = instance_exec(posts, match, &block) || posts
+      end
+    end if @filters
+    posts
+  end
+
+  def apply_order(
+    posts,
+    aggregate_search: false,
+    allow_relevance_search: true,
+    type_filter: "all_topics"
+  )
+    if @order == :latest
+      if aggregate_search
+        posts = posts.order("MAX(posts.created_at) DESC")
+      else
+        posts = posts.reorder("posts.created_at DESC")
+      end
+    elsif @order == :oldest
+      if aggregate_search
+        posts = posts.order("MAX(posts.created_at) ASC")
+      else
+        posts = posts.reorder("posts.created_at ASC")
+      end
+    elsif @order == :latest_topic
+      if aggregate_search
+        posts = posts.order("MAX(topics.created_at) DESC")
+      else
+        posts = posts.order("topics.created_at DESC")
+      end
+    elsif @order == :oldest_topic
+      if aggregate_search
+        posts = posts.order("MAX(topics.created_at) ASC")
+      else
+        posts = posts.order("topics.created_at ASC")
+      end
+    elsif @order == :views
+      if aggregate_search
+        posts = posts.order("MAX(topics.views) DESC")
+      else
+        posts = posts.order("topics.views DESC")
+      end
+    elsif @order == :likes
+      if aggregate_search
+        posts = posts.order("MAX(posts.like_count) DESC")
+      else
+        posts = posts.order("posts.like_count DESC")
+      end
+    elsif allow_relevance_search
+      posts = sort_by_relevance(posts, type_filter: type_filter, aggregate_search: aggregate_search)
+    end
+
+    if @order
+      advanced_order = Search.advanced_orders&.fetch(@order, nil)
+      posts = advanced_order.call(posts) if advanced_order
+    end
+
+    posts
+  end
+
   private
 
   def search_tags(posts, match, positive:)
@@ -1059,11 +1124,6 @@ class Search
             "%#{term_without_quote}%",
           )
       else
-        # A is for title
-        # B is for category
-        # C is for tags
-        # D is for cooked
-        weights = @in_title ? "A" : (SiteSetting.tagging_enabled ? "ABCD" : "ABD")
         posts = posts.where(post_number: 1) if @in_title
         posts = posts.where("post_search_data.search_data @@ #{ts_query(weight_filter: weights)}")
         exact_terms = @term.scan(Regexp.new(PHRASE_MATCH_REGEXP_PATTERN)).flatten
@@ -1075,13 +1135,7 @@ class Search
       end
     end
 
-    @filters.each do |block, match|
-      if block.arity == 1
-        posts = instance_exec(posts, &block) || posts
-      else
-        posts = instance_exec(posts, match, &block) || posts
-      end
-    end if @filters
+    posts = apply_filters(posts)
 
     # If we have a search context, prioritize those posts first
     posts =
@@ -1120,53 +1174,51 @@ class Search
         posts
       end
 
-    if @order == :latest
-      if aggregate_search
-        posts = posts.order("MAX(posts.created_at) DESC")
-      else
-        posts = posts.reorder("posts.created_at DESC")
-      end
-    elsif @order == :oldest
-      if aggregate_search
-        posts = posts.order("MAX(posts.created_at) ASC")
-      else
-        posts = posts.reorder("posts.created_at ASC")
-      end
-    elsif @order == :latest_topic
-      if aggregate_search
-        posts = posts.order("MAX(topics.created_at) DESC")
-      else
-        posts = posts.order("topics.created_at DESC")
-      end
-    elsif @order == :oldest_topic
-      if aggregate_search
-        posts = posts.order("MAX(topics.created_at) ASC")
-      else
-        posts = posts.order("topics.created_at ASC")
-      end
-    elsif @order == :views
-      if aggregate_search
-        posts = posts.order("MAX(topics.views) DESC")
-      else
-        posts = posts.order("topics.views DESC")
-      end
-    elsif @order == :likes
-      if aggregate_search
-        posts = posts.order("MAX(posts.like_count) DESC")
-      else
-        posts = posts.order("posts.like_count DESC")
-      end
-    elsif !is_topic_search
-      exact_rank = nil
+    if type_filter != "private_messages"
+      posts =
+        if secure_category_ids.present?
+          posts.where(
+            "(categories.id IS NULL) OR (NOT categories.read_restricted) OR (categories.id IN (?))",
+            secure_category_ids,
+          ).references(:categories)
+        else
+          posts.where("(categories.id IS NULL) OR (NOT categories.read_restricted)").references(
+            :categories,
+          )
+        end
+    end
 
-      if SiteSetting.prioritize_exact_search_title_match
-        exact_rank = ts_rank_cd(weight_filter: "A", prefix_match: false)
-      end
+    posts =
+      apply_order(
+        posts,
+        aggregate_search: aggregate_search,
+        allow_relevance_search: !is_topic_search,
+        type_filter: type_filter,
+      )
 
-      rank = ts_rank_cd(weight_filter: weights)
+    posts = posts.offset(offset)
+    posts.limit(limit)
+  end
 
-      if type_filter != "private_messages"
-        category_search_priority = <<~SQL
+  def weights
+    # A is for title
+    # B is for category
+    # C is for tags
+    # D is for cooked
+    @in_title ? "A" : (SiteSetting.tagging_enabled ? "ABCD" : "ABD")
+  end
+
+  def sort_by_relevance(posts, type_filter:, aggregate_search:)
+    exact_rank = nil
+
+    if SiteSetting.prioritize_exact_search_title_match
+      exact_rank = ts_rank_cd(weight_filter: "A", prefix_match: false)
+    end
+
+    rank = ts_rank_cd(weight_filter: weights)
+
+    if type_filter != "private_messages"
+      category_search_priority = <<~SQL
         (
           CASE categories.search_priority
           WHEN #{Searchable::PRIORITIES[:very_high]}
@@ -1178,16 +1230,16 @@ class Search
         )
         SQL
 
-        rank_sort_priorities = [["topics.archived", 0.85], ["topics.closed", 0.9]]
+      rank_sort_priorities = [["topics.archived", 0.85], ["topics.closed", 0.9]]
 
-        rank_sort_priorities =
-          DiscoursePluginRegistry.apply_modifier(
-            :search_rank_sort_priorities,
-            rank_sort_priorities,
-            self,
-          )
+      rank_sort_priorities =
+        DiscoursePluginRegistry.apply_modifier(
+          :search_rank_sort_priorities,
+          rank_sort_priorities,
+          self,
+        )
 
-        category_priority_weights = <<~SQL
+      category_priority_weights = <<~SQL
           (
             CASE categories.search_priority
               WHEN #{Searchable::PRIORITIES[:low]}
@@ -1204,61 +1256,38 @@ class Search
           )
         SQL
 
-        posts =
-          if aggregate_search
-            posts.order("MAX(#{category_search_priority}) DESC")
-          else
-            posts.order("#{category_search_priority} DESC")
-          end
-
-        if @term.present? && exact_rank
-          posts =
-            if aggregate_search
-              posts.order("MAX(#{exact_rank} * #{category_priority_weights}) DESC")
-            else
-              posts.order("#{exact_rank} * #{category_priority_weights} DESC")
-            end
+      posts =
+        if aggregate_search
+          posts.order("MAX(#{category_search_priority}) DESC")
+        else
+          posts.order("#{category_search_priority} DESC")
         end
 
-        data_ranking =
-          if @term.blank?
-            "(#{category_priority_weights})"
-          else
-            "(#{rank} * #{category_priority_weights})"
-          end
-
+      if @term.present? && exact_rank
         posts =
           if aggregate_search
-            posts.order("MAX(#{data_ranking}) DESC")
+            posts.order("MAX(#{exact_rank} * #{category_priority_weights}) DESC")
           else
-            posts.order("#{data_ranking} DESC")
+            posts.order("#{exact_rank} * #{category_priority_weights} DESC")
           end
       end
 
-      posts = posts.order("topics.bumped_at DESC")
-    end
-
-    if type_filter != "private_messages"
-      posts =
-        if secure_category_ids.present?
-          posts.where(
-            "(categories.id IS NULL) OR (NOT categories.read_restricted) OR (categories.id IN (?))",
-            secure_category_ids,
-          ).references(:categories)
+      data_ranking =
+        if @term.blank?
+          "(#{category_priority_weights})"
         else
-          posts.where("(categories.id IS NULL) OR (NOT categories.read_restricted)").references(
-            :categories,
-          )
+          "(#{rank} * #{category_priority_weights})"
+        end
+
+      posts =
+        if aggregate_search
+          posts.order("MAX(#{data_ranking}) DESC")
+        else
+          posts.order("#{data_ranking} DESC")
         end
     end
 
-    if @order
-      advanced_order = Search.advanced_orders&.fetch(@order, nil)
-      posts = advanced_order.call(posts) if advanced_order
-    end
-
-    posts = posts.offset(offset)
-    posts.limit(limit)
+    posts.order("topics.bumped_at DESC")
   end
 
   def ts_rank_cd(weight_filter:, prefix_match: true)
diff --git a/spec/lib/search_spec.rb b/spec/lib/search_spec.rb
index b7efa888f23..774b127d1e3 100644
--- a/spec/lib/search_spec.rb
+++ b/spec/lib/search_spec.rb
@@ -2748,4 +2748,21 @@ RSpec.describe Search do
       expect(result.posts.pluck(:id)).to eq([post1.id, post2.id])
     end
   end
+
+  describe "Extensibility features of search" do
+    it "is possible to parse queries" do
+      term = "hello l status:closed"
+      search = Search.new(term)
+
+      posts = Post.all.includes(:topic)
+      posts = search.apply_filters(posts)
+      posts = search.apply_order(posts)
+
+      sql = posts.to_sql
+
+      expect(search.term).to eq("hello")
+      expect(sql).to include("ORDER BY posts.created_at DESC")
+      expect(sql).to match(/where.*topics.closed/i)
+    end
+  end
 end