From 14cf8eacf1a679c08ea7df93aff17949d1a9c4df Mon Sep 17 00:00:00 2001 From: Rafael dos Santos Silva Date: Thu, 2 Feb 2023 13:35:04 -0300 Subject: [PATCH] FEATURE: Use similarity in user search (#20112) Currently, when doing `@mention` for users we have 0 tolerance for typos and misspellings. With this patch, if a user search doesn't return enough results we go and use `pg_trgm` features to try and find more matches based on trigrams of usernames and names. It also introduces GiST indexes on those fields in order to improve performance of this search, going from 130ms down to 15ms in my tests. This is all gated in a feature flag and can be enabled by running `SiteSetting.user_search_similar_results = true` in the rails console. --- app/models/user.rb | 18 ++++++----- app/models/user_search.rb | 28 +++++++++++++++++ config/site_settings.yml | 3 ++ ...0201192925_add_trigram_indexes_to_users.rb | 24 ++++++++++++++ spec/models/user_search_spec.rb | 31 +++++++++++++++++++ 5 files changed, 96 insertions(+), 8 deletions(-) create mode 100644 db/migrate/20230201192925_add_trigram_indexes_to_users.rb diff --git a/app/models/user.rb b/app/models/user.rb index 5bc5304b2fd..729e1740fb1 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -2222,12 +2222,14 @@ end # # Indexes # -# idx_users_admin (id) WHERE admin -# idx_users_moderator (id) WHERE moderator -# index_users_on_last_posted_at (last_posted_at) -# index_users_on_last_seen_at (last_seen_at) -# index_users_on_secure_identifier (secure_identifier) UNIQUE -# index_users_on_uploaded_avatar_id (uploaded_avatar_id) -# index_users_on_username (username) UNIQUE -# index_users_on_username_lower (username_lower) UNIQUE +# idx_users_admin (id) WHERE admin +# idx_users_moderator (id) WHERE moderator +# index_users_on_last_posted_at (last_posted_at) +# index_users_on_last_seen_at (last_seen_at) +# index_users_on_name_trgm (name) USING gist +# index_users_on_secure_identifier (secure_identifier) UNIQUE +# index_users_on_uploaded_avatar_id (uploaded_avatar_id) +# index_users_on_username (username) UNIQUE +# index_users_on_username_lower (username_lower) UNIQUE +# index_users_on_username_lower_trgm (username_lower) USING gist # diff --git a/app/models/user_search.rb b/app/models/user_search.rb index 7b21ff6b7fe..b89b896ffbd 100644 --- a/app/models/user_search.rb +++ b/app/models/user_search.rb @@ -160,6 +160,8 @@ class UserSearch .each { |id| users << id } end + return users.to_a if users.size >= @limit + # 5. last seen users (for search auto-suggestions) if @last_seen_users scoped_users @@ -169,6 +171,32 @@ class UserSearch .each { |id| users << id } end + return users.to_a if users.size >= @limit + + if SiteSetting.user_search_similar_results + # 6. similar usernames + if @term.present? + scoped_users + .where("username_lower <-> ? < 1", @term) + .order(["username_lower <-> ? ASC", @term]) + .limit(@limit - users.size) + .pluck(:id) + .each { |id| users << id } + end + + return users.to_a if users.size >= @limit + + # 7. similar names + if SiteSetting.enable_names? && @term.present? + scoped_users + .where("name <-> ? < 1", @term) + .order(["name <-> ? ASC", @term]) + .limit(@limit - users.size) + .pluck(:id) + .each { |id| users << id } + end + end + users.to_a end diff --git a/config/site_settings.yml b/config/site_settings.yml index 39a26c0af35..2683a943fb6 100644 --- a/config/site_settings.yml +++ b/config/site_settings.yml @@ -2194,6 +2194,9 @@ backups: client: true search: + user_search_similar_results: + default: false + hidden: true prioritize_exact_search_title_match: default: false hidden: true diff --git a/db/migrate/20230201192925_add_trigram_indexes_to_users.rb b/db/migrate/20230201192925_add_trigram_indexes_to_users.rb new file mode 100644 index 00000000000..0b2c78dfa08 --- /dev/null +++ b/db/migrate/20230201192925_add_trigram_indexes_to_users.rb @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +class AddTrigramIndexesToUsers < ActiveRecord::Migration[7.0] + disable_ddl_transaction! + + def change + add_index( + :users, + :username_lower, + using: "gist", + opclass: :gist_trgm_ops, + algorithm: :concurrently, + name: "index_users_on_username_lower_trgm", + ) + add_index( + :users, + :name, + using: "gist", + opclass: :gist_trgm_ops, + algorithm: :concurrently, + name: "index_users_on_name_trgm", + ) + end +end diff --git a/spec/models/user_search_spec.rb b/spec/models/user_search_spec.rb index e7d58fce44d..dc91ef7f046 100644 --- a/spec/models/user_search_spec.rb +++ b/spec/models/user_search_spec.rb @@ -267,4 +267,35 @@ RSpec.describe UserSearch do expect(results[2]).to eq("mrorange") end end + + context "when using SiteSetting.user_search_similar_results" do + it "should find the user even with a typo if the setting is enabled" do + rafael = Fabricate(:user, username: "rafael", name: "Rafael Silva") + codinghorror = Fabricate(:user, username: "codinghorror", name: "Jeff Atwood") + pfaffman = Fabricate(:user, username: "pfaffman") + zogstrip = Fabricate(:user, username: "zogstrip", name: "RĂ©gis Hanol") + + SiteSetting.user_search_similar_results = false + expect(UserSearch.new("rafel").search).to be_blank + expect(UserSearch.new("codding").search).to be_blank + expect(UserSearch.new("pffman").search).to be_blank + + SiteSetting.user_search_similar_results = true + expect(UserSearch.new("rafel").search).to include(rafael) + expect(UserSearch.new("codding").search).to include(codinghorror) + expect(UserSearch.new("pffman").search).to include(pfaffman) + + SiteSetting.user_search_similar_results = false + expect(UserSearch.new("silvia").search).to be_blank + expect(UserSearch.new("atwod").search).to be_blank + expect(UserSearch.new("regis").search).to be_blank + expect(UserSearch.new("reg").search).to be_blank + + SiteSetting.user_search_similar_results = true + expect(UserSearch.new("silvia").search).to include(rafael) + expect(UserSearch.new("atwod").search).to include(codinghorror) + expect(UserSearch.new("regis").search).to include(zogstrip) + expect(UserSearch.new("reg").search).to include(zogstrip) + end + end end