mirror of
https://github.com/discourse/discourse.git
synced 2025-01-19 07:12:45 +08:00
FEATURE: Use similarity in user search (#20112)
Currently, when doing `@mention` for users we have 0 tolerance for typos and misspellings. With this patch, if a user search doesn't return enough results we go and use `pg_trgm` features to try and find more matches based on trigrams of usernames and names. It also introduces GiST indexes on those fields in order to improve performance of this search, going from 130ms down to 15ms in my tests. This is all gated in a feature flag and can be enabled by running `SiteSetting.user_search_similar_results = true` in the rails console.
This commit is contained in:
parent
ca2b2d034f
commit
14cf8eacf1
|
@ -2222,12 +2222,14 @@ end
|
|||
#
|
||||
# Indexes
|
||||
#
|
||||
# idx_users_admin (id) WHERE admin
|
||||
# idx_users_moderator (id) WHERE moderator
|
||||
# index_users_on_last_posted_at (last_posted_at)
|
||||
# index_users_on_last_seen_at (last_seen_at)
|
||||
# index_users_on_secure_identifier (secure_identifier) UNIQUE
|
||||
# index_users_on_uploaded_avatar_id (uploaded_avatar_id)
|
||||
# index_users_on_username (username) UNIQUE
|
||||
# index_users_on_username_lower (username_lower) UNIQUE
|
||||
# idx_users_admin (id) WHERE admin
|
||||
# idx_users_moderator (id) WHERE moderator
|
||||
# index_users_on_last_posted_at (last_posted_at)
|
||||
# index_users_on_last_seen_at (last_seen_at)
|
||||
# index_users_on_name_trgm (name) USING gist
|
||||
# index_users_on_secure_identifier (secure_identifier) UNIQUE
|
||||
# index_users_on_uploaded_avatar_id (uploaded_avatar_id)
|
||||
# index_users_on_username (username) UNIQUE
|
||||
# index_users_on_username_lower (username_lower) UNIQUE
|
||||
# index_users_on_username_lower_trgm (username_lower) USING gist
|
||||
#
|
||||
|
|
|
@ -160,6 +160,8 @@ class UserSearch
|
|||
.each { |id| users << id }
|
||||
end
|
||||
|
||||
return users.to_a if users.size >= @limit
|
||||
|
||||
# 5. last seen users (for search auto-suggestions)
|
||||
if @last_seen_users
|
||||
scoped_users
|
||||
|
@ -169,6 +171,32 @@ class UserSearch
|
|||
.each { |id| users << id }
|
||||
end
|
||||
|
||||
return users.to_a if users.size >= @limit
|
||||
|
||||
if SiteSetting.user_search_similar_results
|
||||
# 6. similar usernames
|
||||
if @term.present?
|
||||
scoped_users
|
||||
.where("username_lower <-> ? < 1", @term)
|
||||
.order(["username_lower <-> ? ASC", @term])
|
||||
.limit(@limit - users.size)
|
||||
.pluck(:id)
|
||||
.each { |id| users << id }
|
||||
end
|
||||
|
||||
return users.to_a if users.size >= @limit
|
||||
|
||||
# 7. similar names
|
||||
if SiteSetting.enable_names? && @term.present?
|
||||
scoped_users
|
||||
.where("name <-> ? < 1", @term)
|
||||
.order(["name <-> ? ASC", @term])
|
||||
.limit(@limit - users.size)
|
||||
.pluck(:id)
|
||||
.each { |id| users << id }
|
||||
end
|
||||
end
|
||||
|
||||
users.to_a
|
||||
end
|
||||
|
||||
|
|
|
@ -2194,6 +2194,9 @@ backups:
|
|||
client: true
|
||||
|
||||
search:
|
||||
user_search_similar_results:
|
||||
default: false
|
||||
hidden: true
|
||||
prioritize_exact_search_title_match:
|
||||
default: false
|
||||
hidden: true
|
||||
|
|
24
db/migrate/20230201192925_add_trigram_indexes_to_users.rb
Normal file
24
db/migrate/20230201192925_add_trigram_indexes_to_users.rb
Normal file
|
@ -0,0 +1,24 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class AddTrigramIndexesToUsers < ActiveRecord::Migration[7.0]
|
||||
disable_ddl_transaction!
|
||||
|
||||
def change
|
||||
add_index(
|
||||
:users,
|
||||
:username_lower,
|
||||
using: "gist",
|
||||
opclass: :gist_trgm_ops,
|
||||
algorithm: :concurrently,
|
||||
name: "index_users_on_username_lower_trgm",
|
||||
)
|
||||
add_index(
|
||||
:users,
|
||||
:name,
|
||||
using: "gist",
|
||||
opclass: :gist_trgm_ops,
|
||||
algorithm: :concurrently,
|
||||
name: "index_users_on_name_trgm",
|
||||
)
|
||||
end
|
||||
end
|
|
@ -267,4 +267,35 @@ RSpec.describe UserSearch do
|
|||
expect(results[2]).to eq("mrorange")
|
||||
end
|
||||
end
|
||||
|
||||
context "when using SiteSetting.user_search_similar_results" do
|
||||
it "should find the user even with a typo if the setting is enabled" do
|
||||
rafael = Fabricate(:user, username: "rafael", name: "Rafael Silva")
|
||||
codinghorror = Fabricate(:user, username: "codinghorror", name: "Jeff Atwood")
|
||||
pfaffman = Fabricate(:user, username: "pfaffman")
|
||||
zogstrip = Fabricate(:user, username: "zogstrip", name: "Régis Hanol")
|
||||
|
||||
SiteSetting.user_search_similar_results = false
|
||||
expect(UserSearch.new("rafel").search).to be_blank
|
||||
expect(UserSearch.new("codding").search).to be_blank
|
||||
expect(UserSearch.new("pffman").search).to be_blank
|
||||
|
||||
SiteSetting.user_search_similar_results = true
|
||||
expect(UserSearch.new("rafel").search).to include(rafael)
|
||||
expect(UserSearch.new("codding").search).to include(codinghorror)
|
||||
expect(UserSearch.new("pffman").search).to include(pfaffman)
|
||||
|
||||
SiteSetting.user_search_similar_results = false
|
||||
expect(UserSearch.new("silvia").search).to be_blank
|
||||
expect(UserSearch.new("atwod").search).to be_blank
|
||||
expect(UserSearch.new("regis").search).to be_blank
|
||||
expect(UserSearch.new("reg").search).to be_blank
|
||||
|
||||
SiteSetting.user_search_similar_results = true
|
||||
expect(UserSearch.new("silvia").search).to include(rafael)
|
||||
expect(UserSearch.new("atwod").search).to include(codinghorror)
|
||||
expect(UserSearch.new("regis").search).to include(zogstrip)
|
||||
expect(UserSearch.new("reg").search).to include(zogstrip)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue
Block a user