From fb780c50fd8522fe497ace05ded10a7950eaa0b3 Mon Sep 17 00:00:00 2001 From: Bianca Nenciu <nbianca@users.noreply.github.com> Date: Mon, 9 Jan 2023 19:19:51 +0200 Subject: [PATCH] FIX: Replace all quote-like unicodes with quotes (#19714) If unaccent is called with quote-like Unicode characters then it can generate invalid queries because some of the transformed quotes by unaccent are not escaped and to_tsquery fails because of bad input. This commits replaces more quote-like Unicode characters before unaccent is called. --- lib/search.rb | 8 +++++--- spec/models/topic_spec.rb | 6 ++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/lib/search.rb b/lib/search.rb index 95e1c69fe67..e81b7ab0d93 100644 --- a/lib/search.rb +++ b/lib/search.rb @@ -1230,9 +1230,11 @@ class Search end def self.escape_string(term) - # HACK: The ’ has to be "unaccented" before it is escaped or the resulting - # tsqueries will be invalid - term = term.gsub("\u{2019}", "'") if SiteSetting.search_ignore_accents + # HACK: The ’ and other similar characters have to be "unaccented" before + # it is escaped or the resulting tsqueries will be invalid + if SiteSetting.search_ignore_accents + term = term.gsub(/[\u02b9\u02bb\u02bc\u02bd\u02c8\u2018\u2019\u201b\u2032\uff07]/, "'") + end PG::Connection.escape_string(term).gsub('\\', '\\\\\\') end diff --git a/spec/models/topic_spec.rb b/spec/models/topic_spec.rb index d60f7b83db7..4db4f3a5546 100644 --- a/spec/models/topic_spec.rb +++ b/spec/models/topic_spec.rb @@ -645,6 +645,12 @@ RSpec.describe Topic do expect(Topic.similar_to("怎么上自己的", "")).to eq([]) end + it "does not result in invalid statement when title contains unicode characters" do + SiteSetting.search_ignore_accents = true + + expect(Topic.similar_to("'bad quotes'", "'bad quotes'")).to eq([]) + end + context "with a similar topic" do fab!(:post) do SearchIndexer.enable