From e61608d0808362b7e49953df27335a5067a412dc Mon Sep 17 00:00:00 2001 From: Alan Guo Xiang Tan Date: Thu, 1 Feb 2024 07:20:46 +0800 Subject: [PATCH] FIX: Remap postgres text search proximity operator (#25497) Why this change? Since 1dba1aca271d20fec88e9e6e890c8f87320ff32f, we have been remapping the `<->` proximity operator in a tsquery to `&`. However, there is another variant of it which follows the `` pattern. For example, the following text "end-to-end" will eventually result in the following tsquery `end-to-end:* <-> end:* <2> end:*` being generated by Postgres. Before this fix, the tsquery is remapped to `end-to-end:* & end:* <2> end:*` by us. This is requires the search data which we store to contain `end` at exactly 2 position apart. Due to the way we limit the number of duplicates in our search data, the search term may end up not matching anything. In bd32912c5ed7eebcda32721849a1e006d02b119b, we made it such that we do not allow any duplicates when indexing a topic's title. Therefore, search for `end-to-end` against a topic title with `end-to-end` will never match because our index will only contain one `end` term. What does this change do? We will remap the `` variant of the proximity operator. --- lib/search.rb | 2 +- spec/lib/search_spec.rb | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/lib/search.rb b/lib/search.rb index d1669d6d227..88379952939 100644 --- a/lib/search.rb +++ b/lib/search.rb @@ -1333,7 +1333,7 @@ class Search tsquery = "TO_TSQUERY(#{ts_config || default_ts_config}, #{escaped_term})" # PG 14 and up default to using the followed by operator # this restores the old behavior - tsquery = "REPLACE(#{tsquery}::text, '<->', '&')::tsquery" + tsquery = "REGEXP_REPLACE(#{tsquery}::text, '<->|<\\d+>', '&', 'g')::tsquery" tsquery = "REPLACE(#{tsquery}::text, '&', '#{escape_string(joiner)}')::tsquery" if joiner tsquery end diff --git a/spec/lib/search_spec.rb b/spec/lib/search_spec.rb index c938e0275f3..3e8d9d592fd 100644 --- a/spec/lib/search_spec.rb +++ b/spec/lib/search_spec.rb @@ -1025,6 +1025,14 @@ RSpec.describe Search do results = Search.execute("tiger", guardian: Guardian.new(user)) expect(results.posts).to eq([post]) end + + it "does not rely on postgres's proximity opreators" do + topic.update!(title: "End-to-end something something testing") + + results = Search.execute("end-to-end test") + + expect(results.posts).to eq([post]) + end end describe "topics" do @@ -2375,13 +2383,19 @@ RSpec.describe Search do it "escapes the term correctly" do expect(Search.ts_query(term: 'Title with trailing backslash\\')).to eq( - "REPLACE(TO_TSQUERY('english', '''Title with trailing backslash\\\\\\\\'':*')::text, '<->', '&')::tsquery", + "REGEXP_REPLACE(TO_TSQUERY('english', '''Title with trailing backslash\\\\\\\\'':*')::text, '<->|<\\d+>', '&', 'g')::tsquery", ) expect(Search.ts_query(term: "Title with trailing quote'")).to eq( - "REPLACE(TO_TSQUERY('english', '''Title with trailing quote'''''':*')::text, '<->', '&')::tsquery", + "REGEXP_REPLACE(TO_TSQUERY('english', '''Title with trailing quote'''''':*')::text, '<->|<\\d+>', '&', 'g')::tsquery", ) end + + it "remaps postgres's proximity operators '<->' and its `` variant" do + expect( + DB.query_single("SELECT #{Search.ts_query(term: "end-to-end")}::text"), + ).to contain_exactly("'end-to-end':* & 'end':* & 'end':*") + end end describe "#word_to_date" do