From 17b7ab0d7bd66bfea62dd10f62874b2d50ed14d8 Mon Sep 17 00:00:00 2001 From: Bianca Nenciu Date: Mon, 12 Dec 2022 17:57:20 +0200 Subject: [PATCH] FIX: Make sure generated tsqueries are valid (#19368) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tsquery used for searching is generated using both functions from Ruby and Postgresql (for example, unaccent function). Depending on the term used, it generated an invalid tsquery. For example "can’t" generated "''can''t''" instead of "''can''''t''". --- lib/search.rb | 22 ++++++++-------------- spec/lib/search_spec.rb | 31 +++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/lib/search.rb b/lib/search.rb index 1eeaa06202f..4f983b3aa3e 100644 --- a/lib/search.rb +++ b/lib/search.rb @@ -1168,28 +1168,18 @@ class Search self.class.default_ts_config end - def self.ts_query(term: , ts_config: nil, joiner: nil, weight_filter: nil) + def self.ts_query(term:, ts_config: nil, joiner: nil, weight_filter: nil) to_tsquery( ts_config: ts_config, term: set_tsquery_weight_filter(term, weight_filter), - joiner: joiner ) end def self.to_tsquery(ts_config: nil, term:, joiner: nil) ts_config = ActiveRecord::Base.connection.quote(ts_config) if ts_config - - # unaccent can be used only when a joiner is present because the - # additional processing and the final conversion to tsquery does not - # work well with characters that are converted to quotes by unaccent. - if joiner - tsquery = "TO_TSQUERY(#{ts_config || default_ts_config}, '#{self.escape_string(term)}')" - tsquery = "REPLACE(#{tsquery}::text, '&', '#{self.escape_string(joiner)}')::tsquery" - else - escaped_term = Search.wrap_unaccent("'#{self.escape_string(term)}'") - tsquery = "TO_TSQUERY(#{ts_config || default_ts_config}, #{escaped_term})" - end - + escaped_term = wrap_unaccent("'#{escape_string(term)}'") + tsquery = "TO_TSQUERY(#{ts_config || default_ts_config}, #{escaped_term})" + tsquery = "REPLACE(#{tsquery}::text, '&', '#{escape_string(joiner)}')::tsquery" if joiner tsquery end @@ -1198,6 +1188,10 @@ class Search end def self.escape_string(term) + # HACK: The ’ has to be "unaccented" before it is escaped or the resulting + # tsqueries will be invalid + term = term.gsub("\u{2019}", "'") if SiteSetting.search_ignore_accents + PG::Connection.escape_string(term).gsub('\\', '\\\\\\') end diff --git a/spec/lib/search_spec.rb b/spec/lib/search_spec.rb index 5087760fb4d..c4ca2171c85 100644 --- a/spec/lib/search_spec.rb +++ b/spec/lib/search_spec.rb @@ -115,6 +115,37 @@ RSpec.describe Search do end end + context "with apostrophes" do + fab!(:post_1) { Fabricate(:post, raw: "searching for: John's") } + fab!(:post_2) { Fabricate(:post, raw: "searching for: Johns") } + + before do + SearchIndexer.enable + end + + after do + SearchIndexer.disable + end + + it "returns correct results" do + SiteSetting.search_ignore_accents = false + [post_1, post_2].each { |post| SearchIndexer.index(post.topic, force: true) } + + expect(Search.execute("John's").posts).to contain_exactly(post_1, post_2) + expect(Search.execute("John’s").posts).to contain_exactly(post_1, post_2) + expect(Search.execute("Johns").posts).to contain_exactly(post_1, post_2) + end + + it "returns correct results with accents" do + SiteSetting.search_ignore_accents = true + [post_1, post_2].each { |post| SearchIndexer.index(post.topic, force: true) } + + expect(Search.execute("John's").posts).to contain_exactly(post_1, post_2) + expect(Search.execute("John’s").posts).to contain_exactly(post_1, post_2) + expect(Search.execute("Johns").posts).to contain_exactly(post_1, post_2) + end + end + describe "custom_eager_load" do fab!(:topic) { Fabricate(:topic) } fab!(:post) { Fabricate(:post, topic: topic) }