2019-05-03 06:17:27 +08:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2014-09-02 17:15:08 +08:00
|
|
|
require "sanitize"
|
|
|
|
|
2013-05-24 02:26:51 +08:00
|
|
|
class Search
|
|
|
|
class GroupedSearchResults
|
2014-09-02 17:15:08 +08:00
|
|
|
include ActiveModel::Serialization
|
2013-05-24 02:26:51 +08:00
|
|
|
|
2014-09-02 17:15:08 +08:00
|
|
|
class TextHelper
|
|
|
|
extend ActionView::Helpers::TextHelper
|
2013-05-24 02:26:51 +08:00
|
|
|
end
|
|
|
|
|
2017-07-17 23:57:13 +08:00
|
|
|
attr_reader(
|
|
|
|
:type_filter,
|
|
|
|
:posts,
|
|
|
|
:categories,
|
2024-02-21 23:29:47 +08:00
|
|
|
:extra_categories,
|
2017-07-17 23:57:13 +08:00
|
|
|
:users,
|
2017-08-25 23:52:18 +08:00
|
|
|
:tags,
|
2019-03-04 17:30:09 +08:00
|
|
|
:groups,
|
2017-07-17 23:57:13 +08:00
|
|
|
:more_posts,
|
|
|
|
:more_categories,
|
|
|
|
:more_users,
|
|
|
|
:term,
|
|
|
|
:search_context,
|
2019-07-02 09:21:52 +08:00
|
|
|
:more_full_page_results,
|
|
|
|
:error,
|
2023-04-03 22:09:36 +08:00
|
|
|
:use_pg_headlines_for_excerpt,
|
2024-02-21 23:29:47 +08:00
|
|
|
:can_lazy_load_categories,
|
2017-07-17 23:57:13 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
attr_accessor :search_log_id
|
2014-09-02 17:15:08 +08:00
|
|
|
|
2020-07-14 11:05:57 +08:00
|
|
|
BLURB_LENGTH = 200
|
|
|
|
|
2022-05-24 23:31:24 +08:00
|
|
|
def initialize(
|
|
|
|
type_filter:,
|
|
|
|
term:,
|
|
|
|
search_context:,
|
|
|
|
blurb_length: nil,
|
|
|
|
blurb_term: nil,
|
2023-04-03 22:09:36 +08:00
|
|
|
is_header_search: false,
|
2024-02-21 23:29:47 +08:00
|
|
|
use_pg_headlines_for_excerpt: SiteSetting.use_pg_headlines_for_excerpt,
|
|
|
|
can_lazy_load_categories: false
|
2022-05-24 23:31:24 +08:00
|
|
|
)
|
2014-09-02 17:15:08 +08:00
|
|
|
@type_filter = type_filter
|
|
|
|
@term = term
|
2020-07-14 11:05:57 +08:00
|
|
|
@blurb_term = blurb_term || term
|
2014-09-02 17:15:08 +08:00
|
|
|
@search_context = search_context
|
2020-07-14 11:05:57 +08:00
|
|
|
@blurb_length = blurb_length || BLURB_LENGTH
|
2014-09-02 17:15:08 +08:00
|
|
|
@posts = []
|
|
|
|
@categories = []
|
2024-02-21 23:29:47 +08:00
|
|
|
@extra_categories = Set.new
|
2014-09-02 17:15:08 +08:00
|
|
|
@users = []
|
2017-08-25 23:52:18 +08:00
|
|
|
@tags = []
|
2019-03-04 17:30:09 +08:00
|
|
|
@groups = []
|
2019-07-02 09:21:52 +08:00
|
|
|
@error = nil
|
2022-05-24 23:31:24 +08:00
|
|
|
@is_header_search = is_header_search
|
2023-04-03 22:09:36 +08:00
|
|
|
@use_pg_headlines_for_excerpt = use_pg_headlines_for_excerpt
|
2024-02-21 23:29:47 +08:00
|
|
|
@can_lazy_load_categories = can_lazy_load_categories
|
2019-07-02 09:21:52 +08:00
|
|
|
end
|
|
|
|
|
|
|
|
def error=(error)
|
|
|
|
@error = error
|
2013-05-24 02:26:51 +08:00
|
|
|
end
|
|
|
|
|
2016-03-18 13:26:20 +08:00
|
|
|
def find_user_data(guardian)
|
|
|
|
if user = guardian.user
|
|
|
|
topics = @posts.map(&:topic)
|
|
|
|
topic_lookup = TopicUser.lookup_for(user, topics)
|
|
|
|
topics.each { |ft| ft.user_data = topic_lookup[ft.id] }
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-08-12 15:33:26 +08:00
|
|
|
OMISSION = "..."
|
|
|
|
SCRUB_HEADLINE_REGEXP =
|
|
|
|
%r{<span(?: \w+="[^"]+")* class="#{Search::HIGHLIGHT_CSS_CLASS}"(?: \w+="[^"]+")*>([^<]*)</span>}
|
|
|
|
|
2014-09-02 17:15:08 +08:00
|
|
|
def blurb(post)
|
2020-07-17 16:27:30 +08:00
|
|
|
opts = { term: @blurb_term, blurb_length: @blurb_length }
|
2024-03-15 01:40:46 +08:00
|
|
|
post_search_data_version = post&.post_search_data&.version
|
2020-07-17 16:27:30 +08:00
|
|
|
|
2024-03-15 01:40:46 +08:00
|
|
|
if post_search_data_version.present? &&
|
|
|
|
post_search_data_version >= SearchIndexer::MIN_POST_BLURB_INDEX_VERSION &&
|
2022-01-26 15:24:11 +08:00
|
|
|
!Search.segment_chinese? && !Search.segment_japanese?
|
2023-04-03 22:09:36 +08:00
|
|
|
if use_pg_headlines_for_excerpt
|
2020-08-12 15:33:26 +08:00
|
|
|
scrubbed_headline = post.headline.gsub(SCRUB_HEADLINE_REGEXP, '\1')
|
|
|
|
prefix_omission = scrubbed_headline.start_with?(post.leading_raw_data) ? "" : OMISSION
|
|
|
|
postfix_omission = scrubbed_headline.end_with?(post.trailing_raw_data) ? "" : OMISSION
|
|
|
|
return "#{prefix_omission}#{post.headline}#{postfix_omission}"
|
2020-08-06 14:15:31 +08:00
|
|
|
else
|
|
|
|
opts[:cooked] = post.post_search_data.raw_data
|
|
|
|
opts[:scrub] = false
|
|
|
|
end
|
2020-07-17 16:27:30 +08:00
|
|
|
else
|
|
|
|
opts[:cooked] = post.cooked
|
|
|
|
end
|
|
|
|
|
|
|
|
GroupedSearchResults.blurb_for(**opts)
|
2014-09-02 17:15:08 +08:00
|
|
|
end
|
2013-05-24 02:26:51 +08:00
|
|
|
|
2014-09-02 17:15:08 +08:00
|
|
|
def add(object)
|
|
|
|
type = object.class.to_s.downcase.pluralize
|
2022-05-24 23:31:24 +08:00
|
|
|
if !@is_header_search && public_send(type).length == Search.per_filter
|
2017-07-21 00:07:13 +08:00
|
|
|
@more_full_page_results = true
|
2022-05-24 23:31:24 +08:00
|
|
|
elsif @is_header_search && public_send(type).length == Search.per_facet
|
2014-09-02 17:15:08 +08:00
|
|
|
instance_variable_set("@more_#{type}".to_sym, true)
|
2013-05-24 02:26:51 +08:00
|
|
|
else
|
2019-05-07 10:05:58 +08:00
|
|
|
(self.public_send(type)) << object
|
2013-05-24 02:26:51 +08:00
|
|
|
end
|
2024-02-21 23:29:47 +08:00
|
|
|
|
|
|
|
if can_lazy_load_categories
|
|
|
|
category =
|
|
|
|
case type
|
|
|
|
when "posts"
|
|
|
|
object.topic.category
|
|
|
|
when "topics"
|
|
|
|
object.category
|
|
|
|
end
|
|
|
|
|
|
|
|
if category
|
|
|
|
extra_categories << category.parent_category if category.parent_category
|
|
|
|
extra_categories << category
|
|
|
|
end
|
|
|
|
end
|
2013-05-24 02:26:51 +08:00
|
|
|
end
|
|
|
|
|
2020-07-17 16:27:30 +08:00
|
|
|
def self.blurb_for(cooked: nil, term: nil, blurb_length: BLURB_LENGTH, scrub: true)
|
2015-06-25 03:08:22 +08:00
|
|
|
blurb = nil
|
2020-08-06 12:25:03 +08:00
|
|
|
|
|
|
|
if scrub
|
2022-03-08 05:03:10 +08:00
|
|
|
cooked = SearchIndexer::HtmlScrubber.scrub(cooked)
|
2020-08-06 12:25:03 +08:00
|
|
|
|
|
|
|
urls = Set.new
|
|
|
|
cooked.scan(Discourse::Utils::URI_REGEXP) { urls << $& }
|
|
|
|
urls.each do |url|
|
|
|
|
begin
|
|
|
|
case File.extname(URI(url).path || "")
|
|
|
|
when Oneboxer::VIDEO_REGEX
|
|
|
|
cooked.gsub!(url, I18n.t("search.video"))
|
|
|
|
when Oneboxer::AUDIO_REGEX
|
|
|
|
cooked.gsub!(url, I18n.t("search.audio"))
|
|
|
|
end
|
|
|
|
rescue URI::InvalidURIError
|
2019-11-06 23:32:15 +08:00
|
|
|
end
|
2019-10-31 21:13:24 +08:00
|
|
|
end
|
2019-10-31 01:07:16 +08:00
|
|
|
end
|
|
|
|
|
2015-06-25 03:08:22 +08:00
|
|
|
if term
|
2020-07-14 11:05:57 +08:00
|
|
|
term = Regexp.last_match[1] if term =~ Regexp.new(Search::PHRASE_MATCH_REGEXP_PATTERN)
|
2019-03-26 17:01:19 +08:00
|
|
|
|
2020-07-14 11:05:57 +08:00
|
|
|
blurb = TextHelper.excerpt(cooked, term, radius: blurb_length / 2)
|
2015-06-25 03:08:22 +08:00
|
|
|
end
|
2018-09-17 16:31:15 +08:00
|
|
|
|
2019-10-31 21:32:42 +08:00
|
|
|
blurb = TextHelper.truncate(cooked, length: blurb_length) if blurb.blank?
|
2015-06-25 03:08:22 +08:00
|
|
|
Sanitize.clean(blurb)
|
|
|
|
end
|
2013-05-24 02:26:51 +08:00
|
|
|
end
|
2014-02-17 11:34:14 +08:00
|
|
|
end
|