FIX: Make sure generated tsqueries are valid ()

The tsquery used for searching is generated using both functions from
Ruby and Postgresql (for example, unaccent function). Depending on the
term used, it generated an invalid tsquery. For example "can’t"
generated "''can''t''" instead of "''can''''t''".
This commit is contained in:
Bianca Nenciu 2022-12-12 17:57:20 +02:00 committed by GitHub
parent 19214aff18
commit 17b7ab0d7b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 39 additions and 14 deletions

@ -1168,28 +1168,18 @@ class Search
self.class.default_ts_config self.class.default_ts_config
end end
def self.ts_query(term: , ts_config: nil, joiner: nil, weight_filter: nil) def self.ts_query(term:, ts_config: nil, joiner: nil, weight_filter: nil)
to_tsquery( to_tsquery(
ts_config: ts_config, ts_config: ts_config,
term: set_tsquery_weight_filter(term, weight_filter), term: set_tsquery_weight_filter(term, weight_filter),
joiner: joiner
) )
end end
def self.to_tsquery(ts_config: nil, term:, joiner: nil) def self.to_tsquery(ts_config: nil, term:, joiner: nil)
ts_config = ActiveRecord::Base.connection.quote(ts_config) if ts_config ts_config = ActiveRecord::Base.connection.quote(ts_config) if ts_config
escaped_term = wrap_unaccent("'#{escape_string(term)}'")
# unaccent can be used only when a joiner is present because the tsquery = "TO_TSQUERY(#{ts_config || default_ts_config}, #{escaped_term})"
# additional processing and the final conversion to tsquery does not tsquery = "REPLACE(#{tsquery}::text, '&', '#{escape_string(joiner)}')::tsquery" if joiner
# work well with characters that are converted to quotes by unaccent.
if joiner
tsquery = "TO_TSQUERY(#{ts_config || default_ts_config}, '#{self.escape_string(term)}')"
tsquery = "REPLACE(#{tsquery}::text, '&', '#{self.escape_string(joiner)}')::tsquery"
else
escaped_term = Search.wrap_unaccent("'#{self.escape_string(term)}'")
tsquery = "TO_TSQUERY(#{ts_config || default_ts_config}, #{escaped_term})"
end
tsquery tsquery
end end
@ -1198,6 +1188,10 @@ class Search
end end
def self.escape_string(term) def self.escape_string(term)
# HACK: The ’ has to be "unaccented" before it is escaped or the resulting
# tsqueries will be invalid
term = term.gsub("\u{2019}", "'") if SiteSetting.search_ignore_accents
PG::Connection.escape_string(term).gsub('\\', '\\\\\\') PG::Connection.escape_string(term).gsub('\\', '\\\\\\')
end end

@ -115,6 +115,37 @@ RSpec.describe Search do
end end
end end
context "with apostrophes" do
fab!(:post_1) { Fabricate(:post, raw: "searching for: John's") }
fab!(:post_2) { Fabricate(:post, raw: "searching for: Johns") }
before do
SearchIndexer.enable
end
after do
SearchIndexer.disable
end
it "returns correct results" do
SiteSetting.search_ignore_accents = false
[post_1, post_2].each { |post| SearchIndexer.index(post.topic, force: true) }
expect(Search.execute("John's").posts).to contain_exactly(post_1, post_2)
expect(Search.execute("John’s").posts).to contain_exactly(post_1, post_2)
expect(Search.execute("Johns").posts).to contain_exactly(post_1, post_2)
end
it "returns correct results with accents" do
SiteSetting.search_ignore_accents = true
[post_1, post_2].each { |post| SearchIndexer.index(post.topic, force: true) }
expect(Search.execute("John's").posts).to contain_exactly(post_1, post_2)
expect(Search.execute("John’s").posts).to contain_exactly(post_1, post_2)
expect(Search.execute("Johns").posts).to contain_exactly(post_1, post_2)
end
end
describe "custom_eager_load" do describe "custom_eager_load" do
fab!(:topic) { Fabricate(:topic) } fab!(:topic) { Fabricate(:topic) }
fab!(:post) { Fabricate(:post, topic: topic) } fab!(:post) { Fabricate(:post, topic: topic) }