mirror of
https://github.com/discourse/discourse.git
synced 2025-03-22 22:36:07 +08:00
FIX: Remap postgres text search proximity operator (#25497)
Why this change? Since 1dba1aca271d20fec88e9e6e890c8f87320ff32f, we have been remapping the `<->` proximity operator in a tsquery to `&`. However, there is another variant of it which follows the `<N>` pattern. For example, the following text "end-to-end" will eventually result in the following tsquery `end-to-end:* <-> end:* <2> end:*` being generated by Postgres. Before this fix, the tsquery is remapped to `end-to-end:* & end:* <2> end:*` by us. This is requires the search data which we store to contain `end` at exactly 2 position apart. Due to the way we limit the number of duplicates in our search data, the search term may end up not matching anything. In bd32912c5ed7eebcda32721849a1e006d02b119b, we made it such that we do not allow any duplicates when indexing a topic's title. Therefore, search for `end-to-end` against a topic title with `end-to-end` will never match because our index will only contain one `end` term. What does this change do? We will remap the `<N>` variant of the proximity operator.
This commit is contained in:
parent
f2ac9e4c12
commit
e61608d080
@ -1333,7 +1333,7 @@ class Search
|
||||
tsquery = "TO_TSQUERY(#{ts_config || default_ts_config}, #{escaped_term})"
|
||||
# PG 14 and up default to using the followed by operator
|
||||
# this restores the old behavior
|
||||
tsquery = "REPLACE(#{tsquery}::text, '<->', '&')::tsquery"
|
||||
tsquery = "REGEXP_REPLACE(#{tsquery}::text, '<->|<\\d+>', '&', 'g')::tsquery"
|
||||
tsquery = "REPLACE(#{tsquery}::text, '&', '#{escape_string(joiner)}')::tsquery" if joiner
|
||||
tsquery
|
||||
end
|
||||
|
@ -1025,6 +1025,14 @@ RSpec.describe Search do
|
||||
results = Search.execute("tiger", guardian: Guardian.new(user))
|
||||
expect(results.posts).to eq([post])
|
||||
end
|
||||
|
||||
it "does not rely on postgres's proximity opreators" do
|
||||
topic.update!(title: "End-to-end something something testing")
|
||||
|
||||
results = Search.execute("end-to-end test")
|
||||
|
||||
expect(results.posts).to eq([post])
|
||||
end
|
||||
end
|
||||
|
||||
describe "topics" do
|
||||
@ -2375,13 +2383,19 @@ RSpec.describe Search do
|
||||
|
||||
it "escapes the term correctly" do
|
||||
expect(Search.ts_query(term: 'Title with trailing backslash\\')).to eq(
|
||||
"REPLACE(TO_TSQUERY('english', '''Title with trailing backslash\\\\\\\\'':*')::text, '<->', '&')::tsquery",
|
||||
"REGEXP_REPLACE(TO_TSQUERY('english', '''Title with trailing backslash\\\\\\\\'':*')::text, '<->|<\\d+>', '&', 'g')::tsquery",
|
||||
)
|
||||
|
||||
expect(Search.ts_query(term: "Title with trailing quote'")).to eq(
|
||||
"REPLACE(TO_TSQUERY('english', '''Title with trailing quote'''''':*')::text, '<->', '&')::tsquery",
|
||||
"REGEXP_REPLACE(TO_TSQUERY('english', '''Title with trailing quote'''''':*')::text, '<->|<\\d+>', '&', 'g')::tsquery",
|
||||
)
|
||||
end
|
||||
|
||||
it "remaps postgres's proximity operators '<->' and its `<N>` variant" do
|
||||
expect(
|
||||
DB.query_single("SELECT #{Search.ts_query(term: "end-to-end")}::text"),
|
||||
).to contain_exactly("'end-to-end':* & 'end':* & 'end':*")
|
||||
end
|
||||
end
|
||||
|
||||
describe "#word_to_date" do
|
||||
|
Loading…
x
Reference in New Issue
Block a user