2013-03-01 03:52:35 +08:00
# encoding: utf-8
2019-04-30 08:27:42 +08:00
# frozen_string_literal: true
2013-03-01 03:52:35 +08:00
2015-10-11 17:41:23 +08:00
require 'rails_helper'
2013-02-06 03:16:51 +08:00
describe Search do
2019-05-06 16:52:31 +08:00
fab! ( :admin ) { Fabricate ( :admin ) }
2013-02-06 03:16:51 +08:00
2013-05-14 09:59:55 +08:00
before do
2016-12-22 10:13:14 +08:00
SearchIndexer . enable
2013-05-14 09:59:55 +08:00
end
2020-07-17 11:12:31 +08:00
context 'post indexing' do
fab! ( :category ) { Fabricate ( :category_with_definition , name : 'america' ) }
fab! ( :topic ) { Fabricate ( :topic , title : 'sam saffron test topic' , category : category ) }
let! ( :post ) { Fabricate ( :post , topic : topic , raw : 'this <b>fun test</b> <img src="bla" title="my image">' ) }
let! ( :post2 ) { Fabricate ( :post , topic : topic ) }
2014-09-01 15:04:40 +08:00
it " should index correctly " do
2020-07-17 11:12:31 +08:00
search_data = post . post_search_data . search_data
2013-02-06 03:16:51 +08:00
2020-07-17 11:12:31 +08:00
expect ( search_data ) . to match ( / fun / )
expect ( search_data ) . to match ( / sam / )
expect ( search_data ) . to match ( / america / )
2013-05-23 03:33:33 +08:00
2020-07-17 11:12:31 +08:00
expect do
topic . update! ( title : " harpi is the new title " )
2020-07-23 14:10:05 +08:00
end . to change { post2 . reload . post_search_data . version } . from ( SearchIndexer :: POST_INDEX_VERSION ) . to ( SearchIndexer :: REINDEX_VERSION )
2020-07-17 11:12:31 +08:00
expect ( post . post_search_data . reload . search_data ) . to match ( / harpi / )
end
2013-02-06 03:16:51 +08:00
2020-07-17 11:12:31 +08:00
it 'should update posts index when topic category changes' do
expect do
topic . update! ( category : Fabricate ( :category ) )
2020-07-23 14:10:05 +08:00
end . to change { post . reload . post_search_data . version } . from ( SearchIndexer :: POST_INDEX_VERSION ) . to ( SearchIndexer :: REINDEX_VERSION )
. and change { post2 . reload . post_search_data . version } . from ( SearchIndexer :: POST_INDEX_VERSION ) . to ( SearchIndexer :: REINDEX_VERSION )
2020-07-17 11:12:31 +08:00
end
it 'should update posts index when topic tags changes' do
SiteSetting . tagging_enabled = true
tag = Fabricate ( :tag )
expect do
DiscourseTagging . tag_topic_by_names ( topic , Guardian . new ( admin ) , [ tag . name ] )
topic . save!
2020-07-23 14:10:05 +08:00
end . to change { post . reload . post_search_data . version } . from ( SearchIndexer :: POST_INDEX_VERSION ) . to ( SearchIndexer :: REINDEX_VERSION )
. and change { post2 . reload . post_search_data . version } . from ( SearchIndexer :: POST_INDEX_VERSION ) . to ( SearchIndexer :: REINDEX_VERSION )
2020-07-17 11:12:31 +08:00
expect ( topic . tags ) . to eq ( [ tag ] )
2013-02-06 03:16:51 +08:00
end
end
2020-07-17 11:12:31 +08:00
context 'user indexing' do
2013-02-26 00:42:20 +08:00
before do
2013-02-06 03:16:51 +08:00
@user = Fabricate ( :user , username : 'fred' , name : 'bob jones' )
2013-05-23 03:33:33 +08:00
@indexed = @user . user_search_data . search_data
2013-02-06 03:16:51 +08:00
end
2014-09-02 17:15:08 +08:00
it " should pick up on data " do
2015-04-25 23:18:35 +08:00
expect ( @indexed ) . to match ( / fred / )
expect ( @indexed ) . to match ( / jone / )
2013-02-06 03:16:51 +08:00
end
end
2020-07-17 11:12:31 +08:00
context 'category indexing' do
let! ( :category ) { Fabricate ( :category_with_definition , name : 'america' ) }
let! ( :topic ) { Fabricate ( :topic , category : category ) }
let! ( :post ) { Fabricate ( :post , topic : topic ) }
let! ( :post2 ) { Fabricate ( :post , topic : topic ) }
let! ( :post3 ) { Fabricate ( :post ) }
2013-02-06 03:16:51 +08:00
2020-07-17 11:12:31 +08:00
it " should index correctly " do
expect ( category . category_search_data . search_data ) . to match ( / america / )
2013-02-06 03:16:51 +08:00
end
2020-07-17 11:12:31 +08:00
it 'should update posts index when category name changes' do
expect do
category . update! ( name : 'some new name' )
2020-07-23 14:10:05 +08:00
end . to change { post . reload . post_search_data . version } . from ( SearchIndexer :: POST_INDEX_VERSION ) . to ( SearchIndexer :: REINDEX_VERSION )
. and change { post2 . reload . post_search_data . version } . from ( SearchIndexer :: POST_INDEX_VERSION ) . to ( SearchIndexer :: REINDEX_VERSION )
2020-07-17 11:12:31 +08:00
2020-07-23 14:10:05 +08:00
expect ( post3 . post_search_data . version ) . to eq ( SearchIndexer :: POST_INDEX_VERSION )
2020-07-17 11:12:31 +08:00
end
2013-02-06 03:16:51 +08:00
end
2017-06-07 14:23:48 +08:00
it 'strips zero-width characters from search terms' do
term = " \ u0063 \ u0061 \ u0070 \ u0079 \ u200b \ u200c \ u200d \ ufeff \ u0062 \ u0061 \ u0072 \ u0061 " . encode ( " UTF-8 " )
expect ( term == 'capybara' ) . to eq ( false )
2017-06-22 03:51:15 +08:00
search = Search . new ( term )
2017-06-07 14:23:48 +08:00
expect ( search . valid? ) . to eq ( true )
expect ( search . term ) . to eq ( 'capybara' )
2017-06-22 03:51:15 +08:00
expect ( search . clean_term ) . to eq ( 'capybara' )
2017-06-07 14:23:48 +08:00
end
2017-12-12 13:47:28 +08:00
it 'replaces curly quotes to regular quotes in search terms' do
term = '“discourse”'
expect ( term == '"discourse"' ) . to eq ( false )
search = Search . new ( term )
expect ( search . valid? ) . to eq ( true )
expect ( search . term ) . to eq ( '"discourse"' )
expect ( search . clean_term ) . to eq ( '"discourse"' )
end
2013-03-07 23:52:01 +08:00
it 'does not search when the search term is too small' do
2016-08-10 02:48:39 +08:00
search = Search . new ( 'evil' , min_search_term_length : 5 )
search . execute
expect ( search . valid? ) . to eq ( false )
expect ( search . term ) . to eq ( '' )
end
2016-08-11 23:52:53 +08:00
it 'needs at least one term that hits the length' do
2016-08-10 02:48:39 +08:00
search = Search . new ( 'a b c d' , min_search_term_length : 5 )
search . execute
expect ( search . valid? ) . to eq ( false )
expect ( search . term ) . to eq ( '' )
end
it 'searches for quoted short terms' do
search = Search . new ( '"a b c d"' , min_search_term_length : 5 )
search . execute
expect ( search . valid? ) . to eq ( true )
expect ( search . term ) . to eq ( '"a b c d"' )
end
2016-08-11 23:52:53 +08:00
it 'searches for short terms if one hits the length' do
2016-08-10 02:48:39 +08:00
search = Search . new ( 'a b c okaylength' , min_search_term_length : 5 )
search . execute
expect ( search . valid? ) . to eq ( true )
2016-08-11 23:52:53 +08:00
expect ( search . term ) . to eq ( 'a b c okaylength' )
2013-03-07 23:52:01 +08:00
end
2013-02-06 03:16:51 +08:00
it 'escapes non alphanumeric characters' do
2015-04-25 23:18:35 +08:00
expect ( Search . execute ( 'foo :!$);}]>@\#\"\'' ) . posts . length ) . to eq ( 0 ) # There are at least three levels of sanitation for Search.query!
2013-08-27 04:25:02 +08:00
end
it " doesn't raise an error when single quotes are present " do
2015-04-25 23:18:35 +08:00
expect ( Search . execute ( " 'hello' world " ) . posts . length ) . to eq ( 0 ) # There are at least three levels of sanitation for Search.query!
2013-02-06 03:16:51 +08:00
end
it 'works when given two terms with spaces' do
2015-04-25 23:18:35 +08:00
expect { Search . execute ( 'evil trout' ) } . not_to raise_error
2013-02-06 03:16:51 +08:00
end
context 'users' do
let! ( :user ) { Fabricate ( :user ) }
2014-09-02 17:15:08 +08:00
let ( :result ) { Search . execute ( 'bruce' , type_filter : 'user' ) }
2013-02-06 03:16:51 +08:00
it 'returns a result' do
2015-04-25 23:18:35 +08:00
expect ( result . users . length ) . to eq ( 1 )
expect ( result . users [ 0 ] . id ) . to eq ( user . id )
2013-02-06 03:16:51 +08:00
end
2015-10-29 02:56:08 +08:00
context 'hiding user profiles' do
2017-07-07 14:09:14 +08:00
before { SiteSetting . hide_user_profiles_from_public = true }
2015-10-29 02:56:08 +08:00
it 'returns no result for anon' do
expect ( result . users . length ) . to eq ( 0 )
end
it 'returns a result for logged in users' do
result = Search . execute ( 'bruce' , type_filter : 'user' , guardian : Guardian . new ( user ) )
expect ( result . users . length ) . to eq ( 1 )
end
end
2013-02-06 03:16:51 +08:00
end
2014-10-25 07:20:41 +08:00
context 'inactive users' do
let! ( :inactive_user ) { Fabricate ( :inactive_user , active : false ) }
let ( :result ) { Search . execute ( 'bruce' ) }
it 'does not return a result' do
2015-04-25 23:18:35 +08:00
expect ( result . users . length ) . to eq ( 0 )
2014-10-25 07:20:41 +08:00
end
end
2015-11-19 04:06:59 +08:00
context 'staged users' do
let ( :staged ) { Fabricate ( :staged ) }
let ( :result ) { Search . execute ( staged . username ) }
it 'does not return a result' do
expect ( result . users . length ) . to eq ( 0 )
end
end
2014-12-04 10:46:52 +08:00
context 'private messages' do
2020-08-24 11:53:07 +08:00
let! ( :post ) { Fabricate ( :private_message_post ) }
2020-08-14 15:19:39 +08:00
2020-08-24 11:53:07 +08:00
let ( :topic ) { post . topic }
2014-12-04 10:46:52 +08:00
2020-08-14 15:19:39 +08:00
let! ( :reply ) do
2020-08-24 11:53:07 +08:00
Fabricate ( :private_message_post ,
topic : post . topic ,
raw : 'hello from mars, we just landed' ,
user : post . user
)
2020-08-14 15:19:39 +08:00
end
2014-12-04 10:46:52 +08:00
2020-08-14 15:19:39 +08:00
let! ( :post2 ) do
2020-08-24 11:53:07 +08:00
Fabricate ( :private_message_post ,
2020-08-14 15:19:39 +08:00
raw : 'another secret pm from mars, testing'
)
end
2014-12-04 10:46:52 +08:00
2020-09-10 11:37:18 +08:00
it 'searches correctly as an admin' do
2020-08-14 15:19:39 +08:00
results = Search . execute (
'mars' ,
type_filter : 'private_messages' ,
2020-09-10 11:37:18 +08:00
guardian : Guardian . new ( admin )
2020-08-14 15:19:39 +08:00
)
2014-12-04 10:46:52 +08:00
2020-09-10 11:37:18 +08:00
expect ( results . posts ) . to eq ( [ ] )
end
2014-12-04 10:46:52 +08:00
2020-09-10 11:37:18 +08:00
it " searches correctly as an admin given another user's context " do
2020-08-14 15:19:39 +08:00
results = Search . execute (
'mars' ,
type_filter : 'private_messages' ,
2020-09-10 11:37:18 +08:00
search_context : reply . user ,
2020-08-14 15:19:39 +08:00
guardian : Guardian . new ( admin )
)
2015-02-19 09:56:49 +08:00
2020-09-10 11:37:18 +08:00
expect ( results . posts ) . to contain_exactly ( reply )
end
it " raises the right error when a normal user searches for another user's context " do
expect do
Search . execute (
'mars' ,
search_context : reply . user ,
type_filter : 'private_messages' ,
guardian : Guardian . new ( Fabricate ( :user ) )
)
end . to raise_error ( Discourse :: InvalidAccess )
end
2015-02-19 09:56:49 +08:00
2020-09-10 11:37:18 +08:00
it 'searches correctly as a user' do
2020-08-14 15:19:39 +08:00
results = Search . execute (
'mars' ,
2020-09-10 11:37:18 +08:00
type_filter : 'private_messages' ,
2020-08-14 15:19:39 +08:00
guardian : Guardian . new ( reply . user )
)
2014-12-04 10:46:52 +08:00
2020-08-14 15:19:39 +08:00
expect ( results . posts ) . to contain_exactly ( reply )
2020-09-10 11:37:18 +08:00
end
2014-12-04 10:46:52 +08:00
2020-09-10 11:37:18 +08:00
it 'searches correctly for a user with no private messages' do
2020-08-14 15:19:39 +08:00
results = Search . execute (
'mars' ,
type_filter : 'private_messages' ,
guardian : Guardian . new ( Fabricate ( :user ) )
2020-09-10 11:37:18 +08:00
)
2020-08-14 15:19:39 +08:00
2020-09-10 11:37:18 +08:00
expect ( results . posts ) . to eq ( [ ] )
end
it 'searches correctly' do
expect do
Search . execute ( 'mars' , type_filter : 'private_messages' )
end . to raise_error ( Discourse :: InvalidAccess )
2014-12-04 10:46:52 +08:00
2020-08-14 15:19:39 +08:00
results = Search . execute (
'mars' ,
type_filter : 'private_messages' ,
2020-09-10 11:37:18 +08:00
guardian : Guardian . new ( reply . user )
2020-08-14 15:19:39 +08:00
)
2014-12-04 10:46:52 +08:00
2020-08-14 15:19:39 +08:00
expect ( results . posts ) . to contain_exactly ( reply )
2014-12-04 10:46:52 +08:00
2020-08-14 15:19:39 +08:00
results = Search . execute (
2020-09-10 11:37:18 +08:00
'mars' ,
search_context : topic ,
guardian : Guardian . new ( reply . user )
2020-08-14 15:19:39 +08:00
)
2017-05-12 03:06:30 +08:00
2020-08-14 15:19:39 +08:00
expect ( results . posts ) . to contain_exactly ( reply )
2017-05-12 03:58:43 +08:00
# can search group PMs as well as non admin
#
user = Fabricate ( :user )
group = Fabricate . build ( :group )
group . add ( user )
group . save!
TopicAllowedGroup . create! ( group_id : group . id , topic_id : topic . id )
2020-09-23 16:59:42 +08:00
[ " mars in:personal " , " mars IN:PERSONAL " ] . each do | query |
results = Search . execute ( query , guardian : Guardian . new ( user ) )
expect ( results . posts ) . to contain_exactly ( reply )
end
2014-12-04 10:46:52 +08:00
end
2020-08-24 11:53:07 +08:00
context 'personal_messages filter' do
2020-08-24 13:51:53 +08:00
it 'does not allow a normal user to search for personal messages of another user' do
expect do
results = Search . execute (
" mars personal_messages: #{ post . user . username } " ,
2020-09-10 11:37:18 +08:00
guardian : Guardian . new ( Fabricate ( :user ) )
2020-08-24 13:51:53 +08:00
)
end . to raise_error ( Discourse :: InvalidAccess )
end
it 'searches correctly for the PM of the given user' do
2020-08-24 11:53:07 +08:00
results = Search . execute (
" mars personal_messages: #{ post . user . username } " ,
2020-08-24 13:51:53 +08:00
guardian : Guardian . new ( admin )
2020-08-24 11:53:07 +08:00
)
expect ( results . posts ) . to contain_exactly ( reply )
end
2020-08-24 13:51:53 +08:00
it 'returns the right results if username is invalid' do
results = Search . execute (
" mars personal_messages:random_username " ,
guardian : Guardian . new ( admin )
)
expect ( results . posts ) . to eq ( [ ] )
end
2020-08-24 11:53:07 +08:00
end
2019-07-22 22:55:49 +08:00
context 'personal-direct flag' do
let ( :current ) { Fabricate ( :user , admin : true , username : " current_user " ) }
let ( :participant ) { Fabricate ( :user , username : " participant_1 " ) }
let ( :participant_2 ) { Fabricate ( :user , username : " participant_2 " ) }
let ( :group ) do
group = Fabricate ( :group , has_messages : true )
group . add ( current )
group . add ( participant )
group
end
def create_pm ( users : , group : nil )
pm = Fabricate ( :private_message_post_one_user , user : users . first ) . topic
users [ 1 .. - 1 ] . each do | u |
pm . invite ( users . first , u . username )
Fabricate ( :post , user : u , topic : pm )
end
if group
pm . invite_group ( users . first , group )
group . users . each do | u |
Fabricate ( :post , user : u , topic : pm )
end
end
pm . reload
end
it 'can find all direct PMs of the current user' do
pm = create_pm ( users : [ current , participant ] )
2020-03-20 12:36:50 +08:00
_pm_2 = create_pm ( users : [ participant_2 , participant ] )
2019-07-22 22:55:49 +08:00
pm_3 = create_pm ( users : [ participant , current ] )
pm_4 = create_pm ( users : [ participant_2 , current ] )
2020-09-23 16:59:42 +08:00
[ " in:personal-direct " , " In:PeRsOnAl-DiReCt " ] . each do | query |
results = Search . execute ( query , guardian : Guardian . new ( current ) )
expect ( results . posts . size ) . to eq ( 3 )
expect ( results . posts . map ( & :topic_id ) ) . to eq ( [ pm_4 . id , pm_3 . id , pm . id ] )
end
2019-07-22 22:55:49 +08:00
end
it 'can filter direct PMs by @username' do
pm = create_pm ( users : [ current , participant ] )
pm_2 = create_pm ( users : [ participant , current ] )
2020-03-20 12:36:50 +08:00
_pm_3 = create_pm ( users : [ participant_2 , current ] )
2020-09-23 16:59:42 +08:00
[
" @ #{ participant . username } in:personal-direct " ,
" @ #{ participant . username } iN:pErSoNaL-dIrEcT "
] . each do | query |
results = Search . execute ( query , guardian : Guardian . new ( current ) )
expect ( results . posts . size ) . to eq ( 2 )
expect ( results . posts . map ( & :topic_id ) ) . to eq ( [ pm_2 . id , pm . id ] )
expect ( results . posts . map ( & :user_id ) . uniq ) . to eq ( [ participant . id ] )
end
2019-07-22 22:55:49 +08:00
end
it " doesn't include PMs that have more than 2 participants " do
2020-03-20 12:36:50 +08:00
_pm = create_pm ( users : [ current , participant , participant_2 ] )
2019-07-22 22:55:49 +08:00
results = Search . execute ( " @ #{ participant . username } in:personal-direct " , guardian : Guardian . new ( current ) )
expect ( results . posts . size ) . to eq ( 0 )
end
it " doesn't include PMs that have groups " do
2020-03-20 12:36:50 +08:00
_pm = create_pm ( users : [ current , participant ] , group : group )
2019-07-22 22:55:49 +08:00
results = Search . execute ( " @ #{ participant . username } in:personal-direct " , guardian : Guardian . new ( current ) )
expect ( results . posts . size ) . to eq ( 0 )
end
end
2020-01-28 18:11:33 +08:00
context 'all topics' do
let! ( :u1 ) { Fabricate ( :user , username : 'fred' , name : 'bob jones' , email : 'foo+1@bar.baz' ) }
let! ( :u2 ) { Fabricate ( :user , username : 'bob' , name : 'fred jones' , email : 'foo+2@bar.baz' ) }
let! ( :u3 ) { Fabricate ( :user , username : 'jones' , name : 'bob fred' , email : 'foo+3@bar.baz' ) }
let! ( :u4 ) { Fabricate ( :user , username : 'alice' , name : 'bob fred' , email : 'foo+4@bar.baz' , admin : true ) }
let! ( :public_topic ) { Fabricate ( :topic , user : u1 ) }
let! ( :public_post1 ) { Fabricate ( :post , topic : public_topic , raw : " what do you want for breakfast? ham and eggs? " , user : u1 ) }
let! ( :public_post2 ) { Fabricate ( :post , topic : public_topic , raw : " ham and spam " , user : u2 ) }
let! ( :private_topic ) { Fabricate ( :topic , user : u1 , category_id : nil , archetype : 'private_message' ) }
let! ( :private_post1 ) { Fabricate ( :post , topic : private_topic , raw : " what do you want for lunch? ham and cheese? " , user : u1 ) }
let! ( :private_post2 ) { Fabricate ( :post , topic : private_topic , raw : " cheese and spam " , user : u2 ) }
it 'finds private messages' do
TopicAllowedUser . create! ( user_id : u1 . id , topic_id : private_topic . id )
TopicAllowedUser . create! ( user_id : u2 . id , topic_id : private_topic . id )
2020-09-23 16:59:42 +08:00
# case insensitive only
results = Search . execute ( 'iN:aLL cheese' , guardian : Guardian . new ( u1 ) )
expect ( results . posts ) . to contain_exactly ( private_post1 )
2020-01-28 18:11:33 +08:00
# private only
2020-08-14 15:19:39 +08:00
results = Search . execute ( 'in:all cheese' , guardian : Guardian . new ( u1 ) )
expect ( results . posts ) . to contain_exactly ( private_post1 )
2020-01-28 18:11:33 +08:00
# public only
2020-08-14 15:19:39 +08:00
results = Search . execute ( 'in:all eggs' , guardian : Guardian . new ( u1 ) )
expect ( results . posts ) . to contain_exactly ( public_post1 )
2020-01-28 18:11:33 +08:00
# both
2020-08-14 15:19:39 +08:00
results = Search . execute ( 'in:all spam' , guardian : Guardian . new ( u1 ) )
expect ( results . posts ) . to contain_exactly ( public_post2 , private_post2 )
2020-01-28 18:11:33 +08:00
2020-03-06 01:50:29 +08:00
# for anon
2020-08-14 15:19:39 +08:00
results = Search . execute ( 'in:all spam' , guardian : Guardian . new )
expect ( results . posts ) . to contain_exactly ( public_post2 )
2020-03-06 01:50:29 +08:00
2020-01-28 18:11:33 +08:00
# nonparticipatory user
2020-08-14 15:19:39 +08:00
results = Search . execute ( 'in:all cheese' , guardian : Guardian . new ( u3 ) )
expect ( results . posts . empty? ) . to eq ( true )
2020-01-28 18:11:33 +08:00
2020-08-14 15:19:39 +08:00
results = Search . execute ( 'in:all eggs' , guardian : Guardian . new ( u3 ) )
expect ( results . posts ) . to contain_exactly ( public_post1 )
2020-01-28 18:11:33 +08:00
2020-08-14 15:19:39 +08:00
results = Search . execute ( 'in:all spam' , guardian : Guardian . new ( u3 ) )
expect ( results . posts ) . to contain_exactly ( public_post2 )
2020-01-28 18:11:33 +08:00
2020-01-28 19:26:42 +08:00
# Admin doesn't see private topic
2020-08-14 15:19:39 +08:00
results = Search . execute ( 'in:all spam' , guardian : Guardian . new ( u4 ) )
expect ( results . posts ) . to contain_exactly ( public_post2 )
2020-01-28 18:11:33 +08:00
# same keyword for different users
2020-08-14 15:19:39 +08:00
results = Search . execute ( 'in:all ham' , guardian : Guardian . new ( u1 ) )
expect ( results . posts ) . to contain_exactly ( public_post1 , private_post1 )
2020-03-06 01:50:29 +08:00
2020-08-14 15:19:39 +08:00
results = Search . execute ( 'in:all ham' , guardian : Guardian . new ( u2 ) )
expect ( results . posts ) . to contain_exactly ( public_post1 , private_post1 )
2020-03-06 01:50:29 +08:00
2020-08-14 15:19:39 +08:00
results = Search . execute ( 'in:all ham' , guardian : Guardian . new ( u3 ) )
expect ( results . posts ) . to contain_exactly ( public_post1 )
2020-01-28 18:11:33 +08:00
end
end
2014-12-04 10:46:52 +08:00
end
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
context 'posts' do
2020-07-27 11:55:54 +08:00
fab! ( :post ) do
SearchIndexer . enable
Fabricate ( :post )
end
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
let ( :topic ) { post . topic }
let! ( :reply ) do
Fabricate ( :post_with_long_raw_content ,
topic : topic ,
user : topic . user ,
) . tap { | post | post . update! ( raw : " #{ post . raw } elephant " ) }
end
let ( :expected_blurb ) do
2020-08-12 15:33:26 +08:00
" #{ Search :: GroupedSearchResults :: OMISSION } hundred characters to satisfy any test conditions that require content longer than the typical test post raw content. It really is some long content, folks. <span class= \" #{ Search :: HIGHLIGHT_CSS_CLASS } \" >elephant</span> "
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
end
it 'returns the post' do
2020-08-06 14:15:31 +08:00
SiteSetting . use_pg_headlines_for_excerpt = true
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
result = Search . execute ( 'elephant' ,
type_filter : 'topic' ,
include_blurbs : true
)
2020-08-06 14:15:31 +08:00
expect ( result . posts . map ( & :id ) ) . to contain_exactly ( reply . id )
2020-08-07 12:43:09 +08:00
post = result . posts . first
expect ( result . blurb ( post ) ) . to eq ( expected_blurb )
expect ( post . topic_title_headline ) . to eq ( topic . fancy_title )
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
end
2020-08-12 15:33:26 +08:00
it " only applies highlighting to the first #{ Search :: MAX_LENGTH_FOR_HEADLINE } characters " do
PERF: Limit characters used to generate headline for search blurb.
We determined using the following benchmark script that limiting to 2500 chars would mean a maximum of
25ms spent generating headlines.
```
require 'benchmark/ips'
string = <<~STRING
Far far away, behind the word mountains...
STRING
def sql_excerpt(string, l = 1000000)
DB.query_single(<<~SQL)
SELECT TS_HEADLINE('english', left('#{string}', #{l}), PLAINTO_TSQUERY('mountains'))
SQL
end
def ruby_excerpt(string)
output = DB.query_single("SELECT '#{string}'")[0]
Search::GroupedSearchResults::TextHelper.excerpt(output, 'mountains', radius: 100)
end
puts "Ruby Excerpt: #{ruby_excerpt(string)}"
puts "SQL Excerpt: #{sql_excerpt(string)}"
puts
Benchmark.ips do |x|
x.time = 10
[1000, 2500, 5000, 10000, 20000, 50000].each do |l|
short_string = string[0..l]
x.report("ts_headline excerpt #{l}") do
sql_excerpt(short_string, l)
end
x.report("actionview excerpt #{l}") do
ruby_excerpt(short_string)
end
end
x.compare!
end
```
```
actionview excerpt 1000: 20570.7 i/s
actionview excerpt 2500: 17863.1 i/s - 1.15x (± 0.00) slower
actionview excerpt 5000: 14228.9 i/s - 1.45x (± 0.00) slower
actionview excerpt 10000: 10906.2 i/s - 1.89x (± 0.00) slower
actionview excerpt 20000: 6255.0 i/s - 3.29x (± 0.00) slower
ts_headline excerpt 1000: 4337.5 i/s - 4.74x (± 0.00) slower
actionview excerpt 50000: 3222.7 i/s - 6.38x (± 0.00) slower
ts_headline excerpt 2500: 2240.4 i/s - 9.18x (± 0.00) slower
ts_headline excerpt 5000: 1258.7 i/s - 16.34x (± 0.00) slower
ts_headline excerpt 10000: 667.2 i/s - 30.83x (± 0.00) slower
ts_headline excerpt 20000: 348.7 i/s - 58.98x (± 0.00) slower
ts_headline excerpt 50000: 131.9 i/s - 155.91x (± 0.00) slower
```
2020-08-07 14:36:12 +08:00
SiteSetting . use_pg_headlines_for_excerpt = true
reply . update! ( raw : " #{ 'a' * Search :: MAX_LENGTH_FOR_HEADLINE } #{ reply . raw } " )
result = Search . execute ( 'elephant' )
expect ( result . posts . map ( & :id ) ) . to contain_exactly ( reply . id )
post = result . posts . first
expect ( post . headline . include? ( 'elephant' ) ) . to eq ( false )
end
2020-08-12 15:33:26 +08:00
it " limits the search headline to #{ Search :: GroupedSearchResults :: BLURB_LENGTH } characters " do
SiteSetting . use_pg_headlines_for_excerpt = true
reply . update! ( raw : " #{ 'a' * Search :: GroupedSearchResults :: BLURB_LENGTH } elephant " )
result = Search . execute ( 'elephant' )
expect ( result . posts . map ( & :id ) ) . to contain_exactly ( reply . id )
post = result . posts . first
expect ( result . blurb ( post ) ) . to eq ( " #{ 'a' * Search :: GroupedSearchResults :: BLURB_LENGTH } #{ Search :: GroupedSearchResults :: OMISSION } " )
end
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
it 'returns the right post and blurb for searches with phrase' do
2020-08-06 14:15:31 +08:00
SiteSetting . use_pg_headlines_for_excerpt = true
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
result = Search . execute ( '"elephant"' ,
type_filter : 'topic' ,
include_blurbs : true
)
2020-08-06 14:15:31 +08:00
expect ( result . posts . map ( & :id ) ) . to contain_exactly ( reply . id )
expect ( result . blurb ( result . posts . first ) ) . to eq ( expected_blurb )
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
end
it 'applies a small penalty to closed topic when ranking' do
post = Fabricate ( :post ,
raw : " My weekly update " ,
topic : Fabricate ( :topic ,
title : " A topic that will be closed " ,
closed : true
)
)
post2 = Fabricate ( :post ,
raw : " My weekly update " ,
topic : Fabricate ( :topic ,
title : " A topic that will be open "
)
)
result = Search . execute ( 'weekly update' )
expect ( result . posts . pluck ( :id ) ) . to eq ( [ post2 . id , post . id ] )
end
it 'aggregates searches in a topic by returning the post with the highest rank' do
post = Fabricate ( :post , topic : topic , raw : " this is a play post " )
FIX: Search by relevance may return incorrect post number.
Follow up to d8c796bc4.
Note that his change increases query time by around 40% in the following
benchmark against `dev.discourse.org` but this is a tradeoff that has to be taken so that relevance
search is accurate.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number ORDER BY rank DESC, bumped_at DESC))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
18.040 (± 0.0%) i/s - 181.000 in 10.035241s
current aggregate search query with proper ranking
12.992 (± 0.0%) i/s - 130.000 in 10.007214s
Comparison:
current aggregate search query: 18.0 i/s
current aggregate search query with proper ranking: 13.0 i/s - 1.39x (± 0.00) slower
```
2020-07-15 11:41:45 +08:00
post2 = Fabricate ( :post , topic : topic , raw : " play play playing played play " )
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
post3 = Fabricate ( :post , raw : " this is a play post " )
FIX: Search by relevance may return incorrect post number.
Follow up to d8c796bc4.
Note that his change increases query time by around 40% in the following
benchmark against `dev.discourse.org` but this is a tradeoff that has to be taken so that relevance
search is accurate.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number ORDER BY rank DESC, bumped_at DESC))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
18.040 (± 0.0%) i/s - 181.000 in 10.035241s
current aggregate search query with proper ranking
12.992 (± 0.0%) i/s - 130.000 in 10.007214s
Comparison:
current aggregate search query: 18.0 i/s
current aggregate search query with proper ranking: 13.0 i/s - 1.39x (± 0.00) slower
```
2020-07-15 11:41:45 +08:00
5 . times do
Fabricate ( :post , topic : topic , raw : " play playing played " )
end
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
results = Search . execute ( 'play' )
expect ( results . posts . map ( & :id ) ) . to eq ( [
post2 . id ,
post3 . id
] )
end
2020-08-21 16:16:28 +08:00
it " is able to search with an offset when configured " do
post_1 = Fabricate ( :post , raw : " this is a play post " )
SiteSetting . search_recent_regular_posts_offset_post_id = post_1 . id + 1
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
results = Search . execute ( 'play post' )
2020-08-21 16:16:28 +08:00
expect ( results . posts ) . to eq ( [ post_1 ] )
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
2020-08-21 16:16:28 +08:00
post_2 = Fabricate ( :post , raw : " this is another play post " )
SiteSetting . search_recent_regular_posts_offset_post_id = post_2 . id
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
results = Search . execute ( 'play post' )
expect ( results . posts . map ( & :id ) ) . to eq ( [
2020-08-21 16:16:28 +08:00
post_2 . id ,
post_1 . id
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
] )
end
2020-07-27 11:55:54 +08:00
it 'allows staff to search for whispers' do
post . update! ( post_type : Post . types [ :whisper ] , raw : 'this is a tiger' )
results = Search . execute ( 'tiger' )
expect ( results . posts ) . to eq ( [ ] )
results = Search . execute ( 'tiger' , guardian : Guardian . new ( admin ) )
expect ( results . posts ) . to eq ( [ post ] )
end
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
end
2013-02-06 03:16:51 +08:00
context 'topics' do
2014-09-02 17:15:08 +08:00
let ( :post ) { Fabricate ( :post ) }
let ( :topic ) { post . topic }
2014-02-17 10:54:51 +08:00
context 'search within topic' do
2020-03-11 05:13:17 +08:00
def new_post ( raw , topic = nil , created_at : nil )
2018-11-07 06:41:55 +08:00
topic || = Fabricate ( :topic )
2020-03-11 05:13:17 +08:00
Fabricate ( :post , topic : topic , topic_id : topic . id , user : topic . user , raw : raw , created_at : created_at )
2014-02-17 10:54:51 +08:00
end
2018-11-07 06:41:55 +08:00
it 'works in Chinese' do
SiteSetting . search_tokenize_chinese_japanese_korean = true
post = new_post ( 'I am not in English 何点になると思いますか' )
results = Search . execute ( '何点になると思' , search_context : post . topic )
expect ( results . posts . map ( & :id ) ) . to eq ( [ post . id ] )
end
2014-02-17 10:54:51 +08:00
it 'displays multiple results within a topic' do
topic = Fabricate ( :topic )
topic2 = Fabricate ( :topic )
2020-03-11 05:13:17 +08:00
new_post ( 'this is the other post I am posting' , topic2 , created_at : 6 . minutes . ago )
new_post ( 'this is my fifth post I am posting' , topic2 , created_at : 5 . minutes . ago )
2014-06-20 13:48:34 +08:00
2020-03-11 05:13:17 +08:00
post1 = new_post ( 'this is the other post I am posting' , topic , created_at : 4 . minutes . ago )
post2 = new_post ( 'this is my first post I am posting' , topic , created_at : 3 . minutes . ago )
post3 = new_post ( 'this is a real long and complicated bla this is my second post I am Posting birds with more stuff bla bla' , topic , created_at : 2 . minutes . ago )
post4 = new_post ( 'this is my fourth post I am posting' , topic , created_at : 1 . minute . ago )
2014-02-17 10:54:51 +08:00
2014-02-19 05:59:18 +08:00
# update posts_count
topic . reload
2014-09-02 17:15:08 +08:00
results = Search . execute ( 'posting' , search_context : post1 . topic )
2015-04-25 23:18:35 +08:00
expect ( results . posts . map ( & :id ) ) . to eq ( [ post1 . id , post2 . id , post3 . id , post4 . id ] )
2014-02-17 10:54:51 +08:00
2017-05-24 23:24:41 +08:00
results = Search . execute ( 'posting l' , search_context : post1 . topic )
expect ( results . posts . map ( & :id ) ) . to eq ( [ post4 . id , post3 . id , post2 . id , post1 . id ] )
2014-06-20 13:48:34 +08:00
# stop words should work
2014-09-02 17:15:08 +08:00
results = Search . execute ( 'this' , search_context : post1 . topic )
2015-04-25 23:18:35 +08:00
expect ( results . posts . length ) . to eq ( 4 )
2016-07-25 13:06:25 +08:00
# phrase search works as expected
results = Search . execute ( '"fourth post I am posting"' , search_context : post1 . topic )
expect ( results . posts . length ) . to eq ( 1 )
2014-02-17 10:54:51 +08:00
end
2018-05-07 17:43:55 +08:00
it " works for unlisted topics " do
2019-04-29 15:32:25 +08:00
topic . update ( visible : false )
2018-05-08 13:59:03 +08:00
_post = new_post ( 'discourse is awesome' , topic )
2018-05-07 17:43:55 +08:00
results = Search . execute ( 'discourse' , search_context : topic )
expect ( results . posts . length ) . to eq ( 1 )
end
2014-02-17 10:54:51 +08:00
end
2013-02-26 00:42:20 +08:00
context 'searching the OP' do
2014-09-02 17:15:08 +08:00
let! ( :post ) { Fabricate ( :post_with_long_raw_content ) }
2020-07-14 11:05:57 +08:00
let ( :result ) { Search . execute ( 'hundred' , type_filter : 'topic' ) }
2013-02-06 03:16:51 +08:00
2013-05-13 08:48:32 +08:00
it 'returns a result correctly' do
2015-04-25 23:18:35 +08:00
expect ( result . posts . length ) . to eq ( 1 )
expect ( result . posts [ 0 ] . id ) . to eq ( post . id )
2013-02-06 03:16:51 +08:00
end
2013-05-14 05:04:41 +08:00
end
2018-05-08 13:59:03 +08:00
context 'searching for quoted title' do
it " can find quoted title " do
create_post ( raw : " this is the raw body " , title : " I am a title yeah " )
result = Search . execute ( '"a title yeah"' )
expect ( result . posts . length ) . to eq ( 1 )
end
end
2014-08-29 03:42:29 +08:00
context " search for a topic by id " do
2014-09-02 17:15:08 +08:00
let ( :result ) { Search . execute ( topic . id , type_filter : 'topic' , search_for_id : true , min_search_term_length : 1 ) }
2014-08-29 03:42:29 +08:00
it 'returns the topic' do
2015-04-25 23:18:35 +08:00
expect ( result . posts . length ) . to eq ( 1 )
expect ( result . posts . first . id ) . to eq ( post . id )
2014-08-29 03:42:29 +08:00
end
end
2013-05-14 05:04:41 +08:00
context " search for a topic by url " do
it 'returns the topic' do
2019-01-14 17:30:45 +08:00
result = Search . execute ( topic . relative_url , search_for_id : true , type_filter : 'topic' )
2015-04-25 23:18:35 +08:00
expect ( result . posts . length ) . to eq ( 1 )
expect ( result . posts . first . id ) . to eq ( post . id )
2013-05-14 05:04:41 +08:00
end
2019-01-14 17:30:45 +08:00
context 'restrict_to_archetype' do
let ( :personal_message ) { Fabricate ( :private_message_topic ) }
let! ( :p1 ) { Fabricate ( :post , topic : personal_message , post_number : 1 ) }
it 'restricts result to topics' do
result = Search . execute ( personal_message . relative_url , search_for_id : true , type_filter : 'topic' , restrict_to_archetype : Archetype . default )
expect ( result . posts . length ) . to eq ( 0 )
result = Search . execute ( topic . relative_url , search_for_id : true , type_filter : 'topic' , restrict_to_archetype : Archetype . default )
expect ( result . posts . length ) . to eq ( 1 )
end
it 'restricts result to messages' do
2019-05-06 16:52:31 +08:00
result = Search . execute ( topic . relative_url , search_for_id : true , type_filter : 'private_messages' , guardian : Guardian . new ( admin ) , restrict_to_archetype : Archetype . private_message )
2019-01-14 17:30:45 +08:00
expect ( result . posts . length ) . to eq ( 0 )
2019-05-06 16:52:31 +08:00
result = Search . execute ( personal_message . relative_url , search_for_id : true , type_filter : 'private_messages' , guardian : Guardian . new ( admin ) , restrict_to_archetype : Archetype . private_message )
2019-01-14 17:30:45 +08:00
expect ( result . posts . length ) . to eq ( 1 )
end
end
2013-05-13 08:48:32 +08:00
end
context 'security' do
2013-06-29 09:22:17 +08:00
2013-05-13 08:48:32 +08:00
def result ( current_user )
2014-10-18 12:19:08 +08:00
Search . execute ( 'hello' , guardian : Guardian . new ( current_user ) )
2013-02-06 03:16:51 +08:00
end
2013-05-13 08:48:32 +08:00
it 'secures results correctly' do
2019-08-06 18:26:54 +08:00
category = Fabricate ( :category_with_definition )
2013-05-13 08:48:32 +08:00
topic . category_id = category . id
topic . save
2013-07-14 09:24:16 +08:00
category . set_permissions ( staff : :full )
2013-05-13 08:48:32 +08:00
category . save
2015-04-25 23:18:35 +08:00
expect ( result ( nil ) . posts ) . not_to be_present
expect ( result ( Fabricate ( :user ) ) . posts ) . not_to be_present
2019-05-06 16:52:31 +08:00
expect ( result ( admin ) . posts ) . to be_present
2013-05-13 08:48:32 +08:00
2013-02-26 00:42:20 +08:00
end
2013-02-06 03:16:51 +08:00
end
end
2013-03-01 03:52:35 +08:00
context 'cyrillic topic' do
2020-07-07 15:45:57 +08:00
let! ( :cyrillic_topic ) {
Fabricate ( :topic ) do
user
title { sequence ( :title ) { | i | " Тестовая запись #{ i } " } }
end
2013-03-01 03:52:35 +08:00
}
2020-07-07 15:45:57 +08:00
2013-03-01 03:52:35 +08:00
let! ( :post ) { Fabricate ( :post , topic : cyrillic_topic , user : cyrillic_topic . user ) }
2014-09-02 17:15:08 +08:00
let ( :result ) { Search . execute ( 'запись' ) }
2013-03-01 03:52:35 +08:00
it 'finds something when given cyrillic query' do
2015-04-25 23:18:35 +08:00
expect ( result . posts ) . to be_present
2013-03-01 03:52:35 +08:00
end
end
2016-10-07 09:40:57 +08:00
it 'does not tokenize search term' do
Fabricate ( :post , raw : 'thing is canned should still be found!' )
expect ( Search . execute ( 'canned' ) . posts ) . to be_present
end
2013-02-06 03:16:51 +08:00
context 'categories' do
2019-08-06 18:26:54 +08:00
let ( :category ) { Fabricate ( :category_with_definition , name : " monkey Category 2 " ) }
2019-03-18 15:25:45 +08:00
let ( :topic ) { Fabricate ( :topic , category : category ) }
let! ( :post ) { Fabricate ( :post , topic : topic , raw : " snow monkey " ) }
2013-02-06 03:16:51 +08:00
2019-03-18 15:25:45 +08:00
let! ( :ignored_category ) do
2019-08-06 18:26:54 +08:00
Fabricate ( :category_with_definition ,
2019-03-18 15:25:45 +08:00
name : " monkey Category 1 " ,
2019-03-19 11:23:14 +08:00
slug : " test " ,
2019-03-18 15:25:45 +08:00
search_priority : Searchable :: PRIORITIES [ :ignore ]
)
end
it " should return the right categories " do
search = Search . execute ( " monkey " )
2020-07-16 09:28:45 +08:00
expect ( search . categories ) . to contain_exactly (
category , ignored_category
2019-03-18 15:25:45 +08:00
)
2020-07-14 13:36:09 +08:00
expect ( search . posts ) . to eq ( [ category . topic . first_post , post ] )
2019-03-19 11:23:14 +08:00
search = Search . execute ( " monkey # test " )
2020-07-14 13:36:09 +08:00
expect ( search . posts ) . to eq ( [ ignored_category . topic . first_post ] )
2013-05-13 16:04:03 +08:00
end
2013-02-06 03:16:51 +08:00
2019-03-18 15:25:45 +08:00
describe " with child categories " do
let! ( :child_of_ignored_category ) do
2019-08-06 18:26:54 +08:00
Fabricate ( :category_with_definition ,
2019-03-18 15:25:45 +08:00
name : " monkey Category 3 " ,
parent_category : ignored_category
)
end
let! ( :post2 ) do
Fabricate ( :post ,
topic : Fabricate ( :topic , category : child_of_ignored_category ) ,
raw : " snow monkey park "
)
end
it 'returns the right results' do
search = Search . execute ( " monkey " )
expect ( search . categories ) . to contain_exactly (
category , ignored_category , child_of_ignored_category
)
2020-07-14 13:36:09 +08:00
expect ( search . posts . map ( & :id ) ) . to eq ( [
child_of_ignored_category . topic . first_post ,
2019-03-18 15:25:45 +08:00
category . topic . first_post ,
2020-07-15 13:25:15 +08:00
post2 ,
post
2020-07-14 13:36:09 +08:00
] . map ( & :id ) )
2019-03-18 15:25:45 +08:00
search = Search . execute ( " snow " )
2020-07-15 13:25:15 +08:00
expect ( search . posts . map ( & :id ) ) . to eq ( [ post2 . id , post . id ] )
2013-05-13 16:04:03 +08:00
2019-03-18 15:25:45 +08:00
category . set_permissions ( { } )
2020-07-16 09:28:45 +08:00
category . save!
2019-03-18 15:25:45 +08:00
search = Search . execute ( " monkey " )
2020-07-16 09:28:45 +08:00
expect ( search . categories ) . to contain_exactly (
2019-03-18 15:25:45 +08:00
ignored_category , child_of_ignored_category
2020-07-16 09:28:45 +08:00
)
2013-05-13 16:04:03 +08:00
2020-07-14 13:36:09 +08:00
expect ( search . posts . map ( & :id ) ) . to eq ( [
2019-03-18 15:25:45 +08:00
child_of_ignored_category . topic . first_post ,
post2
2020-07-14 13:36:09 +08:00
] . map ( & :id ) )
2019-03-18 15:25:45 +08:00
end
2013-02-26 00:42:20 +08:00
end
2013-02-06 03:16:51 +08:00
2019-03-25 10:59:55 +08:00
describe 'categories with different priorities' do
2019-08-06 18:26:54 +08:00
let ( :category2 ) { Fabricate ( :category_with_definition ) }
2019-03-25 10:59:55 +08:00
it " should return posts in the right order " do
raw = " The pure genuine evian "
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
post = Fabricate ( :post , topic : category . topic , raw : raw )
post2 = Fabricate ( :post , topic : category2 . topic , raw : raw )
post2 . topic . update! ( bumped_at : 10 . seconds . from_now )
2019-03-25 10:59:55 +08:00
search = Search . execute ( raw )
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
expect ( search . posts . map ( & :id ) ) . to eq ( [ post2 . id , post . id ] )
2019-03-25 10:59:55 +08:00
category . update! ( search_priority : Searchable :: PRIORITIES [ :high ] )
search = Search . execute ( raw )
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
expect ( search . posts . map ( & :id ) ) . to eq ( [ post . id , post2 . id ] )
2019-03-25 10:59:55 +08:00
end
end
2013-02-06 03:16:51 +08:00
end
2019-03-04 17:30:09 +08:00
context 'groups' do
def search ( user = Fabricate ( :user ) )
Search . execute ( group . name , guardian : Guardian . new ( user ) )
end
let! ( :group ) { Group [ :trust_level_0 ] }
it 'shows group' do
expect ( search . groups . map ( & :name ) ) . to eq ( [ group . name ] )
end
context 'group visibility' do
let! ( :group ) { Fabricate ( :group ) }
before do
group . update! ( visibility_level : 3 )
end
context 'staff logged in' do
it 'shows group' do
2019-05-06 16:52:31 +08:00
expect ( search ( admin ) . groups . map ( & :name ) ) . to eq ( [ group . name ] )
2019-03-04 17:30:09 +08:00
end
end
context 'non staff logged in' do
it 'shows doesn’ t show group' do
expect ( search . groups . map ( & :name ) ) . to be_empty
end
end
end
end
2017-08-25 23:52:18 +08:00
context 'tags' do
def search
Search . execute ( tag . name )
end
let! ( :tag ) { Fabricate ( :tag ) }
2018-10-05 17:23:52 +08:00
let! ( :uppercase_tag ) { Fabricate ( :tag , name : " HeLlO " ) }
2017-08-25 23:52:18 +08:00
let ( :tag_group ) { Fabricate ( :tag_group ) }
2019-08-06 18:26:54 +08:00
let ( :category ) { Fabricate ( :category_with_definition ) }
2017-08-25 23:52:18 +08:00
2018-02-20 11:41:00 +08:00
context 'post searching' do
2019-12-05 02:33:51 +08:00
before do
2018-02-20 11:41:00 +08:00
SiteSetting . tagging_enabled = true
2018-10-05 17:23:52 +08:00
DiscourseTagging . tag_topic_by_names ( post . topic , Guardian . new ( Fabricate . build ( :admin ) ) , [ tag . name , uppercase_tag . name ] )
2018-02-20 11:41:00 +08:00
post . topic . save
2019-12-05 02:33:51 +08:00
end
let ( :post ) { Fabricate ( :post , raw : 'I am special post' ) }
2018-02-20 11:41:00 +08:00
2019-12-05 02:33:51 +08:00
it 'can find posts with tags' do
2018-02-20 11:41:00 +08:00
# we got to make this index (it is deferred)
Jobs :: ReindexSearch . new . rebuild_problem_posts
result = Search . execute ( tag . name )
expect ( result . posts . length ) . to eq ( 1 )
2018-10-05 17:23:52 +08:00
result = Search . execute ( " hElLo " )
expect ( result . posts . length ) . to eq ( 1 )
2018-02-20 11:41:00 +08:00
SiteSetting . tagging_enabled = false
result = Search . execute ( tag . name )
expect ( result . posts . length ) . to eq ( 0 )
end
2019-12-05 02:33:51 +08:00
it 'can find posts with tag synonyms' do
synonym = Fabricate ( :tag , name : 'synonym' , target_tag : tag )
Jobs :: ReindexSearch . new . rebuild_problem_posts
result = Search . execute ( synonym . name )
expect ( result . posts . length ) . to eq ( 1 )
end
2018-02-20 11:41:00 +08:00
end
2017-08-25 23:52:18 +08:00
context 'tagging is disabled' do
before { SiteSetting . tagging_enabled = false }
it 'does not include tags' do
expect ( search . tags ) . to_not be_present
end
end
context 'tagging is enabled' do
before { SiteSetting . tagging_enabled = true }
it 'returns the tag in the result' do
expect ( search . tags ) . to eq ( [ tag ] )
end
it 'shows staff tags' do
2018-04-21 03:25:28 +08:00
create_staff_tags ( [ " #{ tag . name } 9 " ] )
2017-08-25 23:52:18 +08:00
2020-07-14 13:36:09 +08:00
expect ( Search . execute ( tag . name , guardian : Guardian . new ( admin ) ) . tags . map ( & :name ) ) . to eq ( [ tag . name , " #{ tag . name } 9 " ] )
expect ( search . tags . map ( & :name ) ) . to eq ( [ tag . name , " #{ tag . name } 9 " ] )
2017-08-25 23:52:18 +08:00
end
it 'includes category-restricted tags' do
category_tag = Fabricate ( :tag , name : " #{ tag . name } 9 " )
tag_group . tags = [ category_tag ]
category . set_permissions ( admins : :full )
category . allowed_tag_groups = [ tag_group . name ]
category . save!
2020-07-14 13:36:09 +08:00
expect ( Search . execute ( tag . name , guardian : Guardian . new ( admin ) ) . tags ) . to eq ( [ tag , category_tag ] )
expect ( search . tags ) . to eq ( [ tag , category_tag ] )
2017-08-25 23:52:18 +08:00
end
end
end
2013-02-06 03:16:51 +08:00
context 'type_filter' do
let! ( :user ) { Fabricate ( :user , username : 'amazing' , email : 'amazing@amazing.com' ) }
2019-08-06 18:26:54 +08:00
let! ( :category ) { Fabricate ( :category_with_definition , name : 'amazing category' , user : user ) }
2013-02-26 00:42:20 +08:00
2013-02-06 03:16:51 +08:00
context 'user filter' do
2014-09-02 17:15:08 +08:00
let ( :results ) { Search . execute ( 'amazing' , type_filter : 'user' ) }
2013-02-06 03:16:51 +08:00
it " returns a user result " do
2015-04-25 23:18:35 +08:00
expect ( results . categories . length ) . to eq ( 0 )
expect ( results . posts . length ) . to eq ( 0 )
expect ( results . users . length ) . to eq ( 1 )
2013-02-06 03:16:51 +08:00
end
end
context 'category filter' do
2014-09-02 17:15:08 +08:00
let ( :results ) { Search . execute ( 'amazing' , type_filter : 'category' ) }
2013-02-06 03:16:51 +08:00
2013-05-21 16:56:04 +08:00
it " returns a category result " do
2015-04-25 23:18:35 +08:00
expect ( results . categories . length ) . to eq ( 1 )
expect ( results . posts . length ) . to eq ( 0 )
expect ( results . users . length ) . to eq ( 0 )
2013-02-06 03:16:51 +08:00
end
end
2013-05-25 02:03:45 +08:00
end
context 'search_context' do
2014-09-02 17:15:08 +08:00
it 'can find a user when using search context' do
coding_horror = Fabricate ( :coding_horror )
post = Fabricate ( :post )
2013-05-25 02:03:45 +08:00
2014-09-02 17:15:08 +08:00
Fabricate ( :post , user : coding_horror )
2013-05-25 04:17:09 +08:00
2014-09-02 17:15:08 +08:00
result = Search . execute ( 'hello' , search_context : post . user )
result . posts . first . topic_id = post . topic_id
2015-04-25 23:18:35 +08:00
expect ( result . posts . length ) . to eq ( 1 )
2013-05-25 02:03:45 +08:00
end
2014-09-02 17:15:08 +08:00
it 'can use category as a search context' do
2019-08-06 18:26:54 +08:00
category = Fabricate ( :category_with_definition ,
2019-03-18 15:25:45 +08:00
search_priority : Searchable :: PRIORITIES [ :ignore ]
)
2014-09-02 17:15:08 +08:00
topic = Fabricate ( :topic , category : category )
topic_no_cat = Fabricate ( :topic )
2017-03-11 04:58:47 +08:00
# includes subcategory in search
2019-08-06 18:26:54 +08:00
subcategory = Fabricate ( :category_with_definition , parent_category_id : category . id )
2017-03-11 04:58:47 +08:00
sub_topic = Fabricate ( :topic , category : subcategory )
2014-09-02 17:15:08 +08:00
post = Fabricate ( :post , topic : topic , user : topic . user )
2019-11-14 08:40:26 +08:00
Fabricate ( :post , topic : topic_no_cat , user : topic . user )
2017-03-11 04:58:47 +08:00
sub_post = Fabricate ( :post , raw : 'I am saying hello from a subcategory' , topic : sub_topic , user : topic . user )
2013-05-25 02:03:45 +08:00
2014-09-02 17:15:08 +08:00
search = Search . execute ( 'hello' , search_context : category )
2019-11-14 08:40:26 +08:00
expect ( search . posts . map ( & :id ) ) . to match_array ( [ post . id , sub_post . id ] )
2017-03-11 04:58:47 +08:00
expect ( search . posts . length ) . to eq ( 2 )
2013-05-25 02:03:45 +08:00
end
2013-02-06 03:16:51 +08:00
2019-11-14 08:40:26 +08:00
it 'can use tag as a search context' do
tag = Fabricate ( :tag , name : 'important-stuff' )
topic = Fabricate ( :topic )
topic_no_tag = Fabricate ( :topic )
Fabricate ( :topic_tag , tag : tag , topic : topic )
post = Fabricate ( :post , topic : topic , user : topic . user , raw : 'This is my hello' )
Fabricate ( :post , topic : topic_no_tag , user : topic . user )
search = Search . execute ( 'hello' , search_context : tag )
expect ( search . posts . map ( & :id ) ) . to contain_exactly ( post . id )
expect ( search . posts . length ) . to eq ( 1 )
end
2013-02-06 03:16:51 +08:00
end
2014-06-26 07:58:49 +08:00
describe 'Chinese search' do
2017-08-01 03:28:48 +08:00
let ( :sentence ) { 'Discourse中国的基础设施网络正在组装' }
let ( :sentence_t ) { 'Discourse太平山森林遊樂區' }
it 'splits English / Chinese and filter out stop words' do
2014-06-26 07:58:49 +08:00
SiteSetting . default_locale = 'zh_CN'
2017-08-01 03:28:48 +08:00
data = Search . prepare_data ( sentence ) . split ( ' ' )
expect ( data ) . to eq ( [ " Discourse " , " 中国 " , " 基础 " , " 设施 " , " 基础设施 " , " 网络 " , " 正在 " , " 组装 " ] )
end
it 'splits for indexing and filter out stop words' do
SiteSetting . default_locale = 'zh_CN'
data = Search . prepare_data ( sentence , :index ) . split ( ' ' )
expect ( data ) . to eq ( [ " Discourse " , " 中国 " , " 基础设施 " , " 网络 " , " 正在 " , " 组装 " ] )
end
it 'splits English / Traditional Chinese and filter out stop words' do
SiteSetting . default_locale = 'zh_TW'
data = Search . prepare_data ( sentence_t ) . split ( ' ' )
expect ( data ) . to eq ( [ " Discourse " , " 太平 " , " 平山 " , " 太平山 " , " 森林 " , " 遊樂區 " ] )
end
it 'splits for indexing and filter out stop words' do
SiteSetting . default_locale = 'zh_TW'
data = Search . prepare_data ( sentence_t , :index ) . split ( ' ' )
expect ( data ) . to eq ( [ " Discourse " , " 太平山 " , " 森林 " , " 遊樂區 " ] )
2014-06-26 07:58:49 +08:00
end
it 'finds chinese topic based on title' do
2015-04-24 05:22:46 +08:00
skip ( " skipped until pg app installs the db correctly " ) if RbConfig :: CONFIG [ " arch " ] =~ / darwin /
2014-06-26 07:58:49 +08:00
SiteSetting . default_locale = 'zh_TW'
2016-08-10 03:20:28 +08:00
SiteSetting . min_search_term_length = 1
2014-09-16 19:15:05 +08:00
topic = Fabricate ( :topic , title : 'My Title Discourse社區指南' )
2014-09-02 17:15:08 +08:00
post = Fabricate ( :post , topic : topic )
2014-06-26 07:58:49 +08:00
2015-04-25 23:18:35 +08:00
expect ( Search . execute ( '社區指南' ) . posts . first . id ) . to eq ( post . id )
expect ( Search . execute ( '指南' ) . posts . first . id ) . to eq ( post . id )
2014-06-26 07:58:49 +08:00
end
2015-11-27 13:35:27 +08:00
it 'finds chinese topic based on title if tokenization is forced' do
skip ( " skipped until pg app installs the db correctly " ) if RbConfig :: CONFIG [ " arch " ] =~ / darwin /
SiteSetting . search_tokenize_chinese_japanese_korean = true
2016-08-10 03:20:28 +08:00
SiteSetting . min_search_term_length = 1
2015-11-27 13:35:27 +08:00
topic = Fabricate ( :topic , title : 'My Title Discourse社區指南' )
post = Fabricate ( :post , topic : topic )
expect ( Search . execute ( '社區指南' ) . posts . first . id ) . to eq ( post . id )
expect ( Search . execute ( '指南' ) . posts . first . id ) . to eq ( post . id )
end
2014-06-26 07:58:49 +08:00
end
2014-09-03 19:54:10 +08:00
describe 'Advanced search' do
2015-06-23 11:21:50 +08:00
2020-05-29 03:17:28 +08:00
it 'supports pinned' do
2016-03-18 13:26:54 +08:00
topic = Fabricate ( :topic )
Fabricate ( :post , raw : 'hi this is a test 123 123' , topic : topic )
_post = Fabricate ( :post , raw : 'boom boom shake the room' , topic : topic )
topic . update_pinned ( true )
user = Fabricate ( :user )
guardian = Guardian . new ( user )
expect ( Search . execute ( 'boom in:pinned' ) . posts . length ) . to eq ( 1 )
2020-09-23 16:59:42 +08:00
expect ( Search . execute ( 'boom IN:PINNED' ) . posts . length ) . to eq ( 1 )
2016-03-18 13:26:54 +08:00
end
2016-05-13 16:26:53 +08:00
it 'supports wiki' do
topic = Fabricate ( :topic )
2017-03-08 22:46:23 +08:00
topic_2 = Fabricate ( :topic )
post = Fabricate ( :post , raw : 'this is a test 248' , wiki : true , topic : topic )
Fabricate ( :post , raw : 'this is a test 248' , wiki : false , topic : topic_2 )
2016-05-13 16:26:53 +08:00
2017-03-08 22:46:23 +08:00
expect ( Search . execute ( 'test 248' ) . posts . length ) . to eq ( 2 )
expect ( Search . execute ( 'test 248 in:wiki' ) . posts . first ) . to eq ( post )
2020-09-23 16:59:42 +08:00
expect ( Search . execute ( 'test 248 IN:WIKI' ) . posts . first ) . to eq ( post )
2017-03-08 22:46:23 +08:00
end
it 'supports searching for posts that the user has seen/unseen' do
topic = Fabricate ( :topic )
topic_2 = Fabricate ( :topic )
post = Fabricate ( :post , raw : 'logan is longan' , topic : topic )
post_2 = Fabricate ( :post , raw : 'longan is logan' , topic : topic_2 )
[ post . user , topic . user ] . each do | user |
PostTiming . create! (
post_number : post . post_number ,
topic : topic ,
user : user ,
msecs : 1
)
end
expect ( post . seen? ( post . user ) ) . to eq ( true )
expect ( Search . execute ( 'longan' ) . posts . sort ) . to eq ( [ post , post_2 ] )
expect ( Search . execute ( 'longan in:seen' , guardian : Guardian . new ( post . user ) ) . posts )
. to eq ( [ post ] )
2020-09-23 16:59:42 +08:00
expect ( Search . execute ( 'longan IN:SEEN' , guardian : Guardian . new ( post . user ) ) . posts )
. to eq ( [ post ] )
2017-03-08 22:46:23 +08:00
expect ( Search . execute ( 'longan in:seen' ) . posts . sort ) . to eq ( [ post , post_2 ] )
expect ( Search . execute ( 'longan in:seen' , guardian : Guardian . new ( post_2 . user ) ) . posts )
. to eq ( [ ] )
expect ( Search . execute ( 'longan' , guardian : Guardian . new ( post_2 . user ) ) . posts . sort )
. to eq ( [ post , post_2 ] )
expect ( Search . execute ( 'longan in:unseen' , guardian : Guardian . new ( post_2 . user ) ) . posts . sort )
. to eq ( [ post , post_2 ] )
expect ( Search . execute ( 'longan in:unseen' , guardian : Guardian . new ( post . user ) ) . posts )
. to eq ( [ post_2 ] )
2020-09-23 16:59:42 +08:00
expect ( Search . execute ( 'longan IN:UNSEEN' , guardian : Guardian . new ( post . user ) ) . posts )
. to eq ( [ post_2 ] )
2016-05-13 16:26:53 +08:00
end
2020-08-21 15:49:26 +08:00
it 'supports before and after filters' do
2016-03-14 20:27:02 +08:00
time = Time . zone . parse ( '2001-05-20 2:55' )
freeze_time ( time )
2020-08-21 15:49:26 +08:00
post_1 = Fabricate ( :post , raw : 'hi this is a test 123 123' , created_at : time . months_ago ( 2 ) )
post_2 = Fabricate ( :post , raw : 'boom boom shake the room test' )
2015-06-23 11:21:50 +08:00
2020-08-21 15:49:26 +08:00
expect ( Search . execute ( 'test before:1' ) . posts ) . to contain_exactly ( post_1 )
expect ( Search . execute ( 'test before:2001-04-20' ) . posts ) . to contain_exactly ( post_1 )
expect ( Search . execute ( 'test before:2001' ) . posts ) . to eq ( [ ] )
expect ( Search . execute ( 'test after:2001' ) . posts ) . to contain_exactly ( post_1 , post_2 )
expect ( Search . execute ( 'test before:monday' ) . posts ) . to contain_exactly ( post_1 )
expect ( Search . execute ( 'test after:jan' ) . posts ) . to contain_exactly ( post_1 , post_2 )
end
2016-03-14 20:27:02 +08:00
2020-08-21 15:49:26 +08:00
it 'supports in:first, user:, @username' do
topic = Fabricate ( :topic )
post_1 = Fabricate ( :post , raw : 'hi this is a test 123 123' , topic : topic )
post_2 = Fabricate ( :post , raw : 'boom boom shake the room test' , topic : topic )
2015-06-23 11:21:50 +08:00
2020-08-21 15:49:26 +08:00
expect ( Search . execute ( 'test in:first' ) . posts ) . to contain_exactly ( post_1 )
2020-09-23 16:59:42 +08:00
expect ( Search . execute ( 'test IN:FIRST' ) . posts ) . to contain_exactly ( post_1 )
2019-02-25 07:55:24 +08:00
2020-08-21 15:49:26 +08:00
expect ( Search . execute ( 'boom' ) . posts ) . to contain_exactly ( post_2 )
2019-02-25 07:55:24 +08:00
2020-08-21 15:49:26 +08:00
expect ( Search . execute ( 'boom in:first' ) . posts ) . to eq ( [ ] )
expect ( Search . execute ( 'boom f' ) . posts ) . to eq ( [ ] )
2019-02-25 07:55:24 +08:00
2020-08-21 15:49:26 +08:00
expect ( Search . execute ( '123 in:first' ) . posts ) . to contain_exactly ( post_1 )
expect ( Search . execute ( '123 f' ) . posts ) . to contain_exactly ( post_1 )
2015-06-23 11:39:40 +08:00
2020-08-21 15:49:26 +08:00
expect ( Search . execute ( 'user:nobody' ) . posts ) . to eq ( [ ] )
expect ( Search . execute ( " user: #{ post_1 . user . username } " ) . posts ) . to contain_exactly ( post_1 )
expect ( Search . execute ( " user: #{ post_1 . user_id } " ) . posts ) . to contain_exactly ( post_1 )
2016-05-12 16:43:44 +08:00
2020-08-21 15:49:26 +08:00
expect ( Search . execute ( " @ #{ post_1 . user . username } " ) . posts ) . to contain_exactly ( post_1 )
2015-06-23 11:21:50 +08:00
end
2015-09-15 15:39:14 +08:00
it 'supports group' do
topic = Fabricate ( :topic , created_at : 3 . months . ago )
post = Fabricate ( :post , raw : 'hi this is a test 123 123' , topic : topic )
group = Group . create! ( name : " Like_a_Boss " )
GroupUser . create! ( user_id : post . user_id , group_id : group . id )
expect ( Search . execute ( 'group:like_a_boss' ) . posts . length ) . to eq ( 1 )
expect ( Search . execute ( 'group:"like a brick"' ) . posts . length ) . to eq ( 0 )
end
2015-09-15 16:47:27 +08:00
it 'supports badge' do
2015-09-15 15:21:46 +08:00
topic = Fabricate ( :topic , created_at : 3 . months . ago )
post = Fabricate ( :post , raw : 'hi this is a test 123 123' , topic : topic )
badge = Badge . create! ( name : " Like a Boss " , badge_type_id : 1 )
UserBadge . create! ( user_id : post . user_id , badge_id : badge . id , granted_at : 1 . minute . ago , granted_by_id : - 1 )
2015-09-15 16:47:27 +08:00
expect ( Search . execute ( 'badge:"like a boss"' ) . posts . length ) . to eq ( 1 )
2020-09-23 16:59:42 +08:00
expect ( Search . execute ( 'BADGE:"LIKE A BOSS"' ) . posts . length ) . to eq ( 1 )
2015-09-15 16:47:27 +08:00
expect ( Search . execute ( 'badge:"test"' ) . posts . length ) . to eq ( 0 )
2015-09-15 15:21:46 +08:00
end
2020-07-08 15:29:02 +08:00
it 'can match exact phrases' do
post = Fabricate ( :post , raw : %{ this is a test post with 'a URL https://some.site.com/search?q=test.test.test some random text I have to add } )
post2 = Fabricate ( :post , raw : 'test URL post with' )
expect ( Search . execute ( " test post with 'a URL).posts " ) . posts ) . to eq ( [ post2 , post ] )
expect ( Search . execute ( %{ "test post with 'a URL" } ) . posts ) . to eq ( [ post ] )
expect ( Search . execute ( %{ "https://some.site.com/search?q=test.test.test" } ) . posts ) . to eq ( [ post ] )
expect ( Search . execute ( %{ " with 'a URL https://some.site.com/search?q=test.test.test" } ) . posts ) . to eq ( [ post ] )
end
2015-08-10 15:41:14 +08:00
it 'can search numbers correctly, and match exact phrases' do
2019-03-26 16:31:15 +08:00
post = Fabricate ( :post , raw : '3.0 eta is in 2 days horrah' )
post2 = Fabricate ( :post , raw : '3.0 is eta in 2 days horrah' )
2015-08-10 15:41:14 +08:00
2020-07-14 13:36:09 +08:00
expect ( Search . execute ( '3.0 eta' ) . posts ) . to eq ( [ post , post2 ] )
expect ( Search . execute ( " '3.0 eta' " ) . posts ) . to eq ( [ post , post2 ] )
2019-03-26 16:31:15 +08:00
expect ( Search . execute ( " \" 3.0 eta \" " ) . posts ) . to contain_exactly ( post )
expect ( Search . execute ( '"3.0, eta is"' ) . posts ) . to eq ( [ ] )
2015-08-10 15:41:14 +08:00
end
2014-09-03 19:54:10 +08:00
it 'can find by status' do
2019-12-17 00:41:34 +08:00
public_category = Fabricate ( :category , read_restricted : false )
2014-09-03 19:54:10 +08:00
post = Fabricate ( :post , raw : 'hi this is a test 123 123' )
topic = post . topic
2019-12-17 00:41:34 +08:00
topic . update ( category : public_category )
2014-09-03 19:54:10 +08:00
2019-12-17 00:41:34 +08:00
private_category = Fabricate ( :category , read_restricted : true )
post2 = Fabricate ( :post , raw : 'hi this is another test 123 123' )
second_topic = post2 . topic
second_topic . update ( category : private_category )
2020-03-20 12:36:50 +08:00
_post3 = Fabricate ( :post , raw : " another test! " , user : topic . user , topic : second_topic )
2019-12-17 00:41:34 +08:00
expect ( Search . execute ( 'test status:public' ) . posts . length ) . to eq ( 1 )
2015-04-25 23:18:35 +08:00
expect ( Search . execute ( 'test status:closed' ) . posts . length ) . to eq ( 0 )
expect ( Search . execute ( 'test status:open' ) . posts . length ) . to eq ( 1 )
2020-09-23 16:59:42 +08:00
expect ( Search . execute ( 'test STATUS:OPEN' ) . posts . length ) . to eq ( 1 )
2015-06-23 10:14:06 +08:00
expect ( Search . execute ( 'test posts_count:1' ) . posts . length ) . to eq ( 1 )
2016-11-28 22:57:18 +08:00
expect ( Search . execute ( 'test min_post_count:1' ) . posts . length ) . to eq ( 1 )
2014-09-03 19:54:10 +08:00
2019-12-17 00:41:34 +08:00
topic . update ( closed : true )
second_topic . update ( category : public_category )
2014-09-03 19:54:10 +08:00
2019-12-17 00:41:34 +08:00
expect ( Search . execute ( 'test status:public' ) . posts . length ) . to eq ( 2 )
2015-04-25 23:18:35 +08:00
expect ( Search . execute ( 'test status:closed' ) . posts . length ) . to eq ( 1 )
2015-06-23 10:14:06 +08:00
expect ( Search . execute ( 'status:closed' ) . posts . length ) . to eq ( 1 )
2019-12-17 00:41:34 +08:00
expect ( Search . execute ( 'test status:open' ) . posts . length ) . to eq ( 1 )
2014-09-03 19:54:10 +08:00
2019-12-17 00:41:34 +08:00
topic . update ( archived : true , closed : false )
second_topic . update ( closed : true )
2014-09-03 19:54:10 +08:00
2015-04-25 23:18:35 +08:00
expect ( Search . execute ( 'test status:archived' ) . posts . length ) . to eq ( 1 )
expect ( Search . execute ( 'test status:open' ) . posts . length ) . to eq ( 0 )
2014-10-18 11:54:11 +08:00
2015-04-25 23:18:35 +08:00
expect ( Search . execute ( 'test status:noreplies' ) . posts . length ) . to eq ( 1 )
2014-10-18 11:54:11 +08:00
2015-04-25 23:18:35 +08:00
expect ( Search . execute ( 'test in:likes' , guardian : Guardian . new ( topic . user ) ) . posts . length ) . to eq ( 0 )
2014-10-18 12:19:08 +08:00
2019-12-17 00:41:34 +08:00
expect ( Search . execute ( 'test in:posted' , guardian : Guardian . new ( topic . user ) ) . posts . length ) . to eq ( 2 )
2020-09-23 16:59:42 +08:00
expect ( Search . execute ( 'test In:PoStEd' , guardian : Guardian . new ( topic . user ) ) . posts . length ) . to eq ( 2 )
2019-12-17 00:41:34 +08:00
2020-01-23 04:16:17 +08:00
in_created = Search . execute ( 'test in:created' , guardian : Guardian . new ( topic . user ) ) . posts
created_by_user = Search . execute ( " test created:@ #{ topic . user . username } " , guardian : Guardian . new ( topic . user ) ) . posts
expect ( in_created . length ) . to eq ( 1 )
expect ( created_by_user . length ) . to eq ( 1 )
expect ( in_created ) . to eq ( created_by_user )
expect ( Search . execute ( " test created:@ #{ second_topic . user . username } " , guardian : Guardian . new ( topic . user ) ) . posts . length ) . to eq ( 1 )
new_user = Fabricate ( :user )
expect ( Search . execute ( " test created:@ #{ new_user . username } " , guardian : Guardian . new ( topic . user ) ) . posts . length ) . to eq ( 0 )
2014-10-18 12:19:08 +08:00
2014-10-18 12:34:05 +08:00
TopicUser . change ( topic . user . id , topic . id , notification_level : TopicUser . notification_levels [ :tracking ] )
2015-04-25 23:18:35 +08:00
expect ( Search . execute ( 'test in:watching' , guardian : Guardian . new ( topic . user ) ) . posts . length ) . to eq ( 0 )
expect ( Search . execute ( 'test in:tracking' , guardian : Guardian . new ( topic . user ) ) . posts . length ) . to eq ( 1 )
2014-09-03 20:10:18 +08:00
end
2017-06-06 20:39:53 +08:00
it 'can find posts with images' do
post_uploaded = Fabricate ( :post_with_uploaded_image )
Fabricate ( :post )
2017-06-08 02:13:36 +08:00
CookedPostProcessor . new ( post_uploaded ) . update_post_image
2017-06-06 20:39:53 +08:00
FEATURE: Include optimized thumbnails for topics (#9215)
This introduces new APIs for obtaining optimized thumbnails for topics. There are a few building blocks required for this:
- Introduces new `image_upload_id` columns on the `posts` and `topics` table. This replaces the old `image_url` column, which means that thumbnails are now restricted to uploads. Hotlinked thumbnails are no longer possible. In normal use (with pull_hotlinked_images enabled), this has no noticeable impact
- A migration attempts to match existing urls to upload records. If a match cannot be found then the posts will be queued for rebake
- Optimized thumbnails are generated during post_process_cooked. If thumbnails are missing when serializing a topic list, then a sidekiq job is queued
- Topic lists and topics now include a `thumbnails` key, which includes all the available images:
```
"thumbnails": [
{
"max_width": null,
"max_height": null,
"url": "//example.com/original-image.png",
"width": 1380,
"height": 1840
},
{
"max_width": 1024,
"max_height": 1024,
"url": "//example.com/optimized-image.png",
"width": 768,
"height": 1024
}
]
```
- Themes can request additional thumbnail sizes by using a modifier in their `about.json` file:
```
"modifiers": {
"topic_thumbnail_sizes": [
[200, 200],
[800, 800]
],
...
```
Remember that these are generated asynchronously, so your theme should include logic to fallback to other available thumbnails if your requested size has not yet been generated
- Two new raw plugin outlets are introduced, to improve the customisability of the topic list. `topic-list-before-columns` and `topic-list-before-link`
2020-05-05 16:07:50 +08:00
expect ( Search . execute ( 'with:images' ) . posts . map ( & :id ) ) . to contain_exactly ( post_uploaded . id )
2017-06-06 20:39:53 +08:00
end
2014-09-03 20:10:18 +08:00
it 'can find by latest' do
topic1 = Fabricate ( :topic , title : 'I do not like that Sam I am' )
2020-03-11 05:13:17 +08:00
post1 = Fabricate ( :post , topic : topic1 , created_at : 10 . minutes . ago )
post2 = Fabricate ( :post , raw : 'that Sam I am, that Sam I am' , created_at : 5 . minutes . ago )
2014-09-03 20:10:18 +08:00
2015-04-25 23:18:35 +08:00
expect ( Search . execute ( 'sam' ) . posts . map ( & :id ) ) . to eq ( [ post1 . id , post2 . id ] )
2020-09-23 16:59:42 +08:00
expect ( Search . execute ( 'sam ORDER:LATEST' ) . posts . map ( & :id ) ) . to eq ( [ post2 . id , post1 . id ] )
2017-05-24 23:24:41 +08:00
expect ( Search . execute ( 'sam l' ) . posts . map ( & :id ) ) . to eq ( [ post2 . id , post1 . id ] )
expect ( Search . execute ( 'l sam' ) . posts . map ( & :id ) ) . to eq ( [ post2 . id , post1 . id ] )
2017-03-30 01:33:23 +08:00
end
it 'can order by topic creation' do
today = Date . today
yesterday = 1 . day . ago
two_days_ago = 2 . days . ago
2019-08-06 18:26:54 +08:00
category = Fabricate ( :category_with_definition )
2019-04-01 13:40:11 +08:00
old_topic = Fabricate ( :topic ,
title : 'First Topic, testing the created_at sort' ,
created_at : two_days_ago ,
category : category
)
2017-03-30 01:33:23 +08:00
latest_topic = Fabricate ( :topic ,
2019-04-01 13:40:11 +08:00
title : 'Second Topic, testing the created_at sort' ,
created_at : yesterday ,
category : category
)
old_relevant_topic_post = Fabricate ( :post ,
topic : old_topic ,
created_at : yesterday ,
raw : 'Relevant Relevant Topic'
)
2017-03-30 01:33:23 +08:00
2019-04-01 13:40:11 +08:00
latest_irelevant_topic_post = Fabricate ( :post ,
topic : latest_topic ,
created_at : today ,
raw : 'Not Relevant'
)
2017-03-30 01:33:23 +08:00
# Expecting the default results
2020-07-14 13:36:09 +08:00
expect ( Search . execute ( 'Topic' ) . posts . map ( & :id ) ) . to eq ( [
old_relevant_topic_post . id ,
latest_irelevant_topic_post . id ,
category . topic . first_post . id
] )
2014-09-03 19:54:10 +08:00
2017-03-30 01:33:23 +08:00
# Expecting the ordered by topic creation results
2020-07-14 13:36:09 +08:00
expect ( Search . execute ( 'Topic order:latest_topic' ) . posts . map ( & :id ) ) . to eq ( [
category . topic . first_post . id ,
latest_irelevant_topic_post . id ,
old_relevant_topic_post . id
] )
2014-09-03 19:54:10 +08:00
end
2016-05-11 17:53:54 +08:00
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
it 'can order by topic views' do
topic = Fabricate ( :topic , views : 1 )
topic2 = Fabricate ( :topic , views : 2 )
post = Fabricate ( :post , raw : 'Topic' , topic : topic )
post2 = Fabricate ( :post , raw : 'Topic' , topic : topic2 )
expect ( Search . execute ( 'Topic order:views' ) . posts . map ( & :id ) ) . to eq ( [
post2 . id ,
post . id
] )
end
2020-09-24 17:52:04 +08:00
it 'can filter by topic views' do
topic = Fabricate ( :topic , views : 100 )
topic2 = Fabricate ( :topic , views : 200 )
post = Fabricate ( :post , raw : 'Topic' , topic : topic )
post2 = Fabricate ( :post , raw : 'Topic' , topic : topic2 )
expect ( Search . execute ( 'Topic min_view_count:150' ) . posts . map ( & :id ) ) . to eq ( [ post2 . id ] )
expect ( Search . execute ( 'Topic max_view_count:150' ) . posts . map ( & :id ) ) . to eq ( [ post . id ] )
end
2020-07-09 14:56:02 +08:00
it 'can search for terms with dots' do
2016-07-25 14:26:33 +08:00
post = Fabricate ( :post , raw : 'Will.2000 Will.Bob.Bill...' )
expect ( Search . execute ( 'bill' ) . posts . map ( & :id ) ) . to eq ( [ post . id ] )
2020-07-09 14:56:02 +08:00
expect ( Search . execute ( 'bob' ) . posts . map ( & :id ) ) . to eq ( [ post . id ] )
expect ( Search . execute ( '2000' ) . posts . map ( & :id ) ) . to eq ( [ post . id ] )
2016-07-25 14:26:33 +08:00
end
2020-07-08 15:29:02 +08:00
it 'can search URLS correctly' do
2017-12-19 13:04:24 +08:00
post = Fabricate ( :post , raw : 'i like http://wb.camra.org.uk/latest#test so yay' )
2020-07-09 14:56:02 +08:00
2017-12-19 13:04:24 +08:00
expect ( Search . execute ( 'http://wb.camra.org.uk/latest#test' ) . posts . map ( & :id ) ) . to eq ( [ post . id ] )
expect ( Search . execute ( 'camra' ) . posts . map ( & :id ) ) . to eq ( [ post . id ] )
2020-07-09 14:56:02 +08:00
expect ( Search . execute ( 'http://wb' ) . posts . map ( & :id ) ) . to eq ( [ post . id ] )
expect ( Search . execute ( 'wb.camra' ) . posts . map ( & :id ) ) . to eq ( [ post . id ] )
expect ( Search . execute ( 'wb.camra.org' ) . posts . map ( & :id ) ) . to eq ( [ post . id ] )
expect ( Search . execute ( 'org.uk' ) . posts . map ( & :id ) ) . to eq ( [ post . id ] )
expect ( Search . execute ( 'camra.org.uk' ) . posts . map ( & :id ) ) . to eq ( [ post . id ] )
expect ( Search . execute ( 'wb.camra.org.uk' ) . posts . map ( & :id ) ) . to eq ( [ post . id ] )
expect ( Search . execute ( 'wb.camra.org.uk/latest' ) . posts . map ( & :id ) ) . to eq ( [ post . id ] )
expect ( Search . execute ( '/latest#test' ) . posts . map ( & :id ) ) . to eq ( [ post . id ] )
2017-08-15 04:19:28 +08:00
end
2016-06-02 20:41:24 +08:00
it 'supports category slug and tags' do
2016-05-11 17:53:54 +08:00
# main category
2019-08-06 18:26:54 +08:00
category = Fabricate ( :category_with_definition , name : 'category 24' , slug : 'cateGory-24' )
2016-05-11 17:53:54 +08:00
topic = Fabricate ( :topic , created_at : 3 . months . ago , category : category )
2017-02-08 04:53:37 +08:00
post = Fabricate ( :post , raw : 'Sams first post' , topic : topic )
2016-05-11 17:53:54 +08:00
2019-03-12 14:11:21 +08:00
expect ( Search . execute ( 'sams post #categoRy-24' ) . posts . length ) . to eq ( 1 )
2017-02-08 04:53:37 +08:00
expect ( Search . execute ( " sams post category: #{ category . id } " ) . posts . length ) . to eq ( 1 )
2019-03-12 14:11:21 +08:00
expect ( Search . execute ( 'sams post #categoRy-25' ) . posts . length ) . to eq ( 0 )
2016-05-11 17:53:54 +08:00
2019-08-06 18:26:54 +08:00
sub_category = Fabricate ( :category_with_definition , name : 'sub category' , slug : 'sub-category' , parent_category_id : category . id )
2016-05-11 17:53:54 +08:00
second_topic = Fabricate ( :topic , created_at : 3 . months . ago , category : sub_category )
2017-02-08 04:53:37 +08:00
Fabricate ( :post , raw : 'sams second post' , topic : second_topic )
2016-05-11 17:53:54 +08:00
2019-03-12 14:11:21 +08:00
expect ( Search . execute ( " sams post category:categoRY-24 " ) . posts . length ) . to eq ( 2 )
expect ( Search . execute ( " sams post category:=cAtegory-24 " ) . posts . length ) . to eq ( 1 )
2017-02-08 04:53:37 +08:00
expect ( Search . execute ( " sams post # category-24 " ) . posts . length ) . to eq ( 2 )
expect ( Search . execute ( " sams post # =category-24 " ) . posts . length ) . to eq ( 1 )
expect ( Search . execute ( " sams post # sub-category " ) . posts . length ) . to eq ( 1 )
2016-06-02 20:41:24 +08:00
2019-03-12 14:11:21 +08:00
expect ( Search . execute ( " sams post # categoRY-24:SUB-category " ) . posts . length )
. to eq ( 1 )
2016-06-02 20:41:24 +08:00
# tags
2018-10-05 17:23:52 +08:00
topic . tags = [ Fabricate ( :tag , name : 'alpha' ) , Fabricate ( :tag , name : 'привет' ) , Fabricate ( :tag , name : 'HeLlO' ) ]
2016-06-02 20:41:24 +08:00
expect ( Search . execute ( 'this is a test #alpha' ) . posts . map ( & :id ) ) . to eq ( [ post . id ] )
2018-08-27 09:05:28 +08:00
expect ( Search . execute ( 'this is a test #привет' ) . posts . map ( & :id ) ) . to eq ( [ post . id ] )
2018-10-05 17:23:52 +08:00
expect ( Search . execute ( 'this is a test #hElLo' ) . posts . map ( & :id ) ) . to eq ( [ post . id ] )
2016-06-02 20:41:24 +08:00
expect ( Search . execute ( 'this is a test #beta' ) . posts . size ) . to eq ( 0 )
2016-05-11 17:53:54 +08:00
end
2016-05-05 02:02:47 +08:00
2020-03-20 12:36:50 +08:00
it 'supports sub-sub category slugs' do
SiteSetting . max_category_nesting = 3
category = Fabricate ( :category , name : 'top' , slug : 'top' )
sub = Fabricate ( :category , name : 'middle' , slug : 'middle' , parent_category_id : category . id )
leaf = Fabricate ( :category , name : 'leaf' , slug : 'leaf' , parent_category_id : sub . id )
topic = Fabricate ( :topic , created_at : 3 . months . ago , category : leaf )
_post = Fabricate ( :post , raw : 'Sams first post' , topic : topic )
expect ( Search . execute ( '#Middle:leaf first post' ) . posts . size ) . to eq ( 1 )
end
2017-08-02 06:15:04 +08:00
it 'correctly handles #symbol when no tag or category match' do
Fabricate ( :post , raw : 'testing #1 #9998' )
results = Search . new ( 'testing #1' ) . execute
expect ( results . posts . length ) . to eq ( 1 )
results = Search . new ( '#9998' ) . execute
expect ( results . posts . length ) . to eq ( 1 )
results = Search . new ( '#777' ) . execute
expect ( results . posts . length ) . to eq ( 0 )
2018-05-28 13:28:02 +08:00
results = Search . new ( 'xxx #:' ) . execute
expect ( results . posts . length ) . to eq ( 0 )
2017-08-02 06:15:04 +08:00
end
2017-05-30 19:01:17 +08:00
context 'tags' do
2019-06-27 15:53:26 +08:00
fab! ( :tag1 ) { Fabricate ( :tag , name : 'lunch' ) }
fab! ( :tag2 ) { Fabricate ( :tag , name : 'eggs' ) }
fab! ( :tag3 ) { Fabricate ( :tag , name : 'sandwiches' ) }
fab! ( :tag_group ) do
group = TagGroup . create! ( name : 'mid day' )
TagGroupMembership . create! ( tag_id : tag1 . id , tag_group_id : group . id )
TagGroupMembership . create! ( tag_id : tag3 . id , tag_group_id : group . id )
group
end
fab! ( :topic1 ) { Fabricate ( :topic , tags : [ tag2 , Fabricate ( :tag ) ] ) }
fab! ( :topic2 ) { Fabricate ( :topic , tags : [ tag2 ] ) }
fab! ( :topic3 ) { Fabricate ( :topic , tags : [ tag1 , tag2 ] ) }
fab! ( :topic4 ) { Fabricate ( :topic , tags : [ tag1 , tag2 , tag3 ] ) }
fab! ( :topic5 ) { Fabricate ( :topic , tags : [ tag2 , tag3 ] ) }
def indexed_post ( * args )
SearchIndexer . enable
Fabricate ( :post , * args )
end
fab! ( :post1 ) { indexed_post ( topic : topic1 ) }
fab! ( :post2 ) { indexed_post ( topic : topic2 ) }
fab! ( :post3 ) { indexed_post ( topic : topic3 ) }
fab! ( :post4 ) { indexed_post ( topic : topic4 ) }
fab! ( :post5 ) { indexed_post ( topic : topic5 ) }
it 'can find posts by tag group' do
2020-07-14 13:36:09 +08:00
expect ( Search . execute ( '#mid-day' ) . posts . map ( & :id ) ) . to eq ( [
post5 , post4 , post3
] . map ( & :id ) )
2019-06-27 15:53:26 +08:00
end
2017-05-30 19:01:17 +08:00
2017-05-31 23:32:29 +08:00
it 'can find posts with tag' do
post4 = Fabricate ( :post , topic : topic3 , raw : " It probably doesn't help that they're green... " )
expect ( Search . execute ( 'green tags:eggs' ) . posts . map ( & :id ) ) . to eq ( [ post4 . id ] )
expect ( Search . execute ( 'tags:plants' ) . posts . size ) . to eq ( 0 )
end
2018-08-27 09:05:28 +08:00
it 'can find posts with non-latin tag' do
topic = Fabricate ( :topic )
topic . tags = [ Fabricate ( :tag , name : 'さようなら' ) ]
post = Fabricate ( :post , raw : 'Testing post' , topic : topic )
expect ( Search . execute ( 'tags:さようなら' ) . posts . map ( & :id ) ) . to eq ( [ post . id ] )
end
2020-08-25 10:20:45 +08:00
it 'can find posts with thai tag' do
topic = Fabricate ( :topic )
topic . tags = [ Fabricate ( :tag , name : 'เรซิ่น' ) ]
post = Fabricate ( :post , raw : 'Testing post' , topic : topic )
expect ( Search . execute ( 'tags:เรซิ่น' ) . posts . map ( & :id ) ) . to eq ( [ post . id ] )
end
2017-05-30 19:01:17 +08:00
it 'can find posts with any tag from multiple tags' do
2018-01-04 05:45:21 +08:00
expect ( Search . execute ( 'tags:eggs,lunch' ) . posts . map ( & :id ) . sort ) . to eq ( [ post1 . id , post2 . id , post3 . id , post4 . id , post5 . id ] . sort )
2017-05-30 19:01:17 +08:00
end
2016-05-05 02:02:47 +08:00
2017-05-30 19:01:17 +08:00
it 'can find posts which contains all provided tags' do
2018-01-04 05:45:21 +08:00
expect ( Search . execute ( 'tags:lunch+eggs+sandwiches' ) . posts . map ( & :id ) ) . to eq ( [ post4 . id ] . sort )
expect ( Search . execute ( 'tags:eggs+lunch+sandwiches' ) . posts . map ( & :id ) ) . to eq ( [ post4 . id ] . sort )
end
it 'can find posts which contains provided tags and does not contain selected ones' do
2020-07-14 13:36:09 +08:00
expect ( Search . execute ( 'tags:eggs -tags:lunch' ) . posts . map ( & :id ) )
. to eq ( [ post5 , post2 , post1 ] . map ( & :id ) )
2018-01-04 05:45:21 +08:00
2020-07-14 13:36:09 +08:00
expect ( Search . execute ( 'tags:eggs -tags:lunch+sandwiches' ) . posts . map ( & :id ) )
. to eq ( [ post5 , post3 , post2 , post1 ] . map ( & :id ) )
2018-01-04 05:45:21 +08:00
2020-07-14 13:36:09 +08:00
expect ( Search . execute ( 'tags:eggs -tags:lunch,sandwiches' ) . posts . map ( & :id ) )
. to eq ( [ post2 , post1 ] . map ( & :id ) )
2017-05-30 19:01:17 +08:00
end
2018-09-28 17:27:08 +08:00
it 'orders posts correctly when combining tags with categories or terms' do
2019-08-06 18:26:54 +08:00
cat1 = Fabricate ( :category_with_definition , name : 'food' )
2018-09-28 17:27:08 +08:00
topic6 = Fabricate ( :topic , tags : [ tag1 , tag2 ] , category : cat1 )
topic7 = Fabricate ( :topic , tags : [ tag1 , tag2 , tag3 ] , category : cat1 )
2020-03-11 05:13:17 +08:00
post7 = Fabricate ( :post , topic : topic6 , raw : " Wakey, wakey, eggs and bakey. " , like_count : 5 , created_at : 2 . minutes . ago )
post8 = Fabricate ( :post , topic : topic7 , raw : " Bakey, bakey, eggs to makey. " , like_count : 2 , created_at : 1 . minute . ago )
2018-09-28 17:27:08 +08:00
expect ( Search . execute ( 'bakey tags:lunch order:latest' ) . posts . map ( & :id ) )
. to eq ( [ post8 . id , post7 . id ] )
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
2018-09-28 17:27:08 +08:00
expect ( Search . execute ( '#food tags:lunch order:latest' ) . posts . map ( & :id ) )
. to eq ( [ post8 . id , post7 . id ] )
FIX: Ensure that aggregating search shows the post with the higest rank.
Previously, we would only take either the `MIN` or `MAX` for
`post_number` during aggregation meaning that the ranking is not
considered.
```
require 'benchmark/ips'
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("current aggregate search query") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX((
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.report("current aggregate search query with proper ranking") do
DB.exec <<~SQL
SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", (
TS_RANK_CD(
post_search_data.search_data,
TO_TSQUERY('english', '''postgres'':*ABCD'),
1|32
) *
(
CASE categories.search_priority
WHEN 2
THEN 0.6
WHEN 3
THEN 0.8
WHEN 4
THEN 1.2
WHEN 5
THEN 1.4
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
)
rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN (
SELECT categories.id WHERE categories.search_priority = 1
)
) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number;
SQL
end
x.compare!
end
```
```
Warming up --------------------------------------
current aggregate search query
1.000 i/100ms
current aggregate search query with proper ranking
1.000 i/100ms
Calculating -------------------------------------
current aggregate search query
17.726 (± 0.0%) i/s - 178.000 in 10.045107s
current aggregate search query with proper ranking
17.802 (± 0.0%) i/s - 178.000 in 10.002230s
Comparison:
current aggregate search query with proper ranking: 17.8 i/s
current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower
```
2020-07-07 15:36:57 +08:00
2018-09-28 17:27:08 +08:00
expect ( Search . execute ( '#food tags:lunch order:likes' ) . posts . map ( & :id ) )
. to eq ( [ post7 . id , post8 . id ] )
end
2016-05-05 02:02:47 +08:00
end
2017-05-30 19:01:17 +08:00
2017-06-21 03:21:56 +08:00
it " can find posts which contains filetypes " do
2020-07-14 13:36:09 +08:00
post1 = Fabricate ( :post , raw : " http://example.com/image.png " )
2017-06-21 03:21:56 +08:00
post2 = Fabricate ( :post ,
2020-07-14 13:36:09 +08:00
raw : " Discourse logo \n " \
" http://example.com/logo.png \n " \
" http://example.com/vector_image.svg "
)
2017-07-07 01:11:32 +08:00
post_with_upload = Fabricate ( :post , uploads : [ Fabricate ( :upload ) ] )
2017-06-21 03:21:56 +08:00
Fabricate ( :post )
TopicLink . extract_from ( post1 )
TopicLink . extract_from ( post2 )
2017-07-07 01:11:32 +08:00
expect ( Search . execute ( 'filetype:svg' ) . posts ) . to eq ( [ post2 ] )
2020-07-14 13:36:09 +08:00
expect ( Search . execute ( 'filetype:png' ) . posts . map ( & :id ) ) . to eq ( [
post_with_upload , post2 , post1
] . map ( & :id ) )
expect ( Search . execute ( 'logo filetype:png' ) . posts ) . to eq ( [ post2 ] )
2017-06-21 03:21:56 +08:00
end
2014-09-03 19:54:10 +08:00
end
2018-11-22 05:07:13 +08:00
context '#ts_query' do
it 'can parse complex strings using ts_query helper' do
2019-04-30 08:27:42 +08:00
str = + " grigio:babel deprecated? "
2018-11-22 05:07:13 +08:00
str << " page page on Atmosphere](https://atmospherejs.com/grigio/babel)xxx: aaa.js:222 aaa' \" bbb "
2015-08-13 15:55:10 +08:00
2018-11-22 05:07:13 +08:00
ts_query = Search . ts_query ( term : str , ts_config : " simple " )
2019-04-30 08:27:42 +08:00
expect { DB . exec ( + " SELECT to_tsvector('bbb') @@ " << ts_query ) } . to_not raise_error
2018-11-22 05:07:13 +08:00
ts_query = Search . ts_query ( term : " foo.bar/'&baz " , ts_config : " simple " )
2019-04-30 08:27:42 +08:00
expect { DB . exec ( + " SELECT to_tsvector('bbb') @@ " << ts_query ) } . to_not raise_error
2018-11-22 05:07:13 +08:00
expect ( ts_query ) . to include ( " baz " )
end
2015-08-13 15:55:10 +08:00
end
2016-03-14 20:27:02 +08:00
context '#word_to_date' do
it 'parses relative dates correctly' do
time = Time . zone . parse ( '2001-02-20 2:55' )
freeze_time ( time )
expect ( Search . word_to_date ( 'yesterday' ) ) . to eq ( time . beginning_of_day . yesterday )
expect ( Search . word_to_date ( 'suNday' ) ) . to eq ( Time . zone . parse ( '2001-02-18' ) )
expect ( Search . word_to_date ( 'thursday' ) ) . to eq ( Time . zone . parse ( '2001-02-15' ) )
expect ( Search . word_to_date ( 'deCember' ) ) . to eq ( Time . zone . parse ( '2000-12-01' ) )
expect ( Search . word_to_date ( 'deC' ) ) . to eq ( Time . zone . parse ( '2000-12-01' ) )
expect ( Search . word_to_date ( 'january' ) ) . to eq ( Time . zone . parse ( '2001-01-01' ) )
expect ( Search . word_to_date ( 'jan' ) ) . to eq ( Time . zone . parse ( '2001-01-01' ) )
expect ( Search . word_to_date ( '100' ) ) . to eq ( time . beginning_of_day . days_ago ( 100 ) )
expect ( Search . word_to_date ( 'invalid' ) ) . to eq ( nil )
end
it 'parses absolute dates correctly' do
expect ( Search . word_to_date ( '2001-1-20' ) ) . to eq ( Time . zone . parse ( '2001-01-20' ) )
expect ( Search . word_to_date ( '2030-10-2' ) ) . to eq ( Time . zone . parse ( '2030-10-02' ) )
expect ( Search . word_to_date ( '2030-10' ) ) . to eq ( Time . zone . parse ( '2030-10-01' ) )
expect ( Search . word_to_date ( '2030' ) ) . to eq ( Time . zone . parse ( '2030-01-01' ) )
expect ( Search . word_to_date ( '2030-01-32' ) ) . to eq ( nil )
expect ( Search . word_to_date ( '10000' ) ) . to eq ( nil )
end
end
2016-08-11 03:40:58 +08:00
context " # min_post_id " do
it " returns 0 when prefer_recent_posts is disabled " do
SiteSetting . search_prefer_recent_posts = false
expect ( Search . min_post_id_no_cache ) . to eq ( 0 )
end
it " returns a value when prefer_recent_posts is enabled " do
SiteSetting . search_prefer_recent_posts = true
SiteSetting . search_recent_posts_size = 1
Fabricate ( :post )
p2 = Fabricate ( :post )
expect ( Search . min_post_id_no_cache ) . to eq ( p2 . id )
end
end
2017-07-17 23:57:13 +08:00
context " search_log_id " do
it " returns an id when the search succeeds " do
s = Search . new (
'indiana jones' ,
search_type : :header ,
ip_address : '127.0.0.1'
)
results = s . execute
expect ( results . search_log_id ) . to be_present
end
2018-01-16 18:29:22 +08:00
it " does not log search if search_type is not present " do
2018-01-17 15:27:33 +08:00
s = Search . new ( 'foo bar' , ip_address : '127.0.0.1' )
2018-01-16 18:29:22 +08:00
results = s . execute
expect ( results . search_log_id ) . not_to be_present
end
2017-07-17 23:57:13 +08:00
end
2018-02-20 11:41:00 +08:00
context 'in:title' do
it 'allows for search in title' do
topic = Fabricate ( :topic , title : 'I am testing a title search' )
2020-07-16 12:21:19 +08:00
post2 = Fabricate ( :post , topic : topic , raw : 'this is the second post' , post_number : 2 )
post = Fabricate ( :post , topic : topic , raw : 'this is the first post' , post_number : 1 )
2018-02-20 11:41:00 +08:00
results = Search . execute ( 'title in:title' )
2020-07-16 12:21:19 +08:00
expect ( results . posts . map ( & :id ) ) . to eq ( [ post . id ] )
2019-02-25 07:55:24 +08:00
2020-09-23 16:59:42 +08:00
results = Search . execute ( 'title iN:tItLe' )
expect ( results . posts . map ( & :id ) ) . to eq ( [ post . id ] )
2018-02-20 11:41:00 +08:00
results = Search . execute ( 'first in:title' )
2020-07-16 12:21:19 +08:00
expect ( results . posts ) . to eq ( [ ] )
2018-02-20 11:41:00 +08:00
end
2019-02-05 17:54:52 +08:00
it 'works irrespective of the order' do
topic = Fabricate ( :topic , title : " A topic about Discourse " )
Fabricate ( :post , topic : topic , raw : " This is another post " )
topic2 = Fabricate ( :topic , title : " This is another topic " )
Fabricate ( :post , topic : topic2 , raw : " Discourse is awesome " )
results = Search . execute ( 'Discourse in:title status:open' )
expect ( results . posts . length ) . to eq ( 1 )
results = Search . execute ( 'in:title status:open Discourse' )
expect ( results . posts . length ) . to eq ( 1 )
end
2018-02-20 11:41:00 +08:00
end
2018-10-23 09:10:33 +08:00
context 'ignore_diacritics' do
before { SiteSetting . search_ignore_accents = true }
let! ( :post1 ) { Fabricate ( :post , raw : 'สวัสดี Rágis hello' ) }
it ( 'allows strips correctly' ) do
results = Search . execute ( 'hello' , type_filter : 'topic' )
expect ( results . posts . length ) . to eq ( 1 )
results = Search . execute ( 'ragis' , type_filter : 'topic' )
expect ( results . posts . length ) . to eq ( 1 )
2020-07-14 11:05:57 +08:00
results = Search . execute ( 'Rágis' , type_filter : 'topic' )
2018-10-23 09:10:33 +08:00
expect ( results . posts . length ) . to eq ( 1 )
# TODO: this is a test we need to fix!
#expect(results.blurb(results.posts.first)).to include('Rágis')
results = Search . execute ( 'สวัสดี' , type_filter : 'topic' )
expect ( results . posts . length ) . to eq ( 1 )
end
end
context 'include_diacritics' do
before { SiteSetting . search_ignore_accents = false }
2018-08-24 08:00:51 +08:00
let! ( :post1 ) { Fabricate ( :post , raw : 'สวัสดี Régis hello' ) }
it ( 'allows strips correctly' ) do
results = Search . execute ( 'hello' , type_filter : 'topic' )
expect ( results . posts . length ) . to eq ( 1 )
results = Search . execute ( 'regis' , type_filter : 'topic' )
2018-08-31 09:46:55 +08:00
expect ( results . posts . length ) . to eq ( 0 )
2018-08-24 08:00:51 +08:00
2020-07-14 11:05:57 +08:00
results = Search . execute ( 'Régis' , type_filter : 'topic' )
2018-08-24 08:00:51 +08:00
expect ( results . posts . length ) . to eq ( 1 )
2018-08-31 09:46:55 +08:00
expect ( results . blurb ( results . posts . first ) ) . to include ( 'Régis' )
2018-08-24 08:00:51 +08:00
results = Search . execute ( 'สวัสดี' , type_filter : 'topic' )
expect ( results . posts . length ) . to eq ( 1 )
end
end
2017-07-21 16:43:02 +08:00
context 'pagination' do
let ( :number_of_results ) { 2 }
2017-08-01 08:34:02 +08:00
let! ( :post1 ) { Fabricate ( :post , raw : 'hello hello hello hello hello' ) }
let! ( :post2 ) { Fabricate ( :post , raw : 'hello hello hello hello' ) }
let! ( :post3 ) { Fabricate ( :post , raw : 'hello hello hello' ) }
let! ( :post4 ) { Fabricate ( :post , raw : 'hello hello' ) }
let! ( :post5 ) { Fabricate ( :post , raw : 'hello' ) }
2017-07-21 16:43:02 +08:00
before do
Search . stubs ( :per_filter ) . returns ( number_of_results )
end
it 'returns more results flag' do
results = Search . execute ( 'hello' , type_filter : 'topic' )
results2 = Search . execute ( 'hello' , type_filter : 'topic' , page : 2 )
expect ( results . posts . length ) . to eq ( number_of_results )
2017-07-26 18:10:19 +08:00
expect ( results . posts . map ( & :id ) ) . to eq ( [ post1 . id , post2 . id ] )
2017-07-21 16:43:02 +08:00
expect ( results . more_full_page_results ) . to eq ( true )
expect ( results2 . posts . length ) . to eq ( number_of_results )
2017-07-26 18:10:19 +08:00
expect ( results2 . posts . map ( & :id ) ) . to eq ( [ post3 . id , post4 . id ] )
2017-07-21 16:43:02 +08:00
expect ( results2 . more_full_page_results ) . to eq ( true )
end
it 'correctly search with page parameter' do
search = Search . new ( 'hello' , type_filter : 'topic' , page : 3 )
results = search . execute
expect ( search . offset ) . to eq ( 2 * number_of_results )
expect ( results . posts . length ) . to eq ( 1 )
2017-07-26 18:10:19 +08:00
expect ( results . posts ) . to eq ( [ post5 ] )
2017-07-21 16:43:02 +08:00
expect ( results . more_full_page_results ) . to eq ( nil )
end
end
2019-06-28 16:19:57 +08:00
context 'in:tagged' do
it 'allows for searching by presence of any tags' do
topic = Fabricate ( :topic , title : 'I am testing a tagged search' )
_post = Fabricate ( :post , topic : topic , raw : 'this is the first post' )
tag = Fabricate ( :tag )
2020-03-20 12:36:50 +08:00
_topic_tag = Fabricate ( :topic_tag , topic : topic , tag : tag )
2019-06-28 16:19:57 +08:00
results = Search . execute ( 'in:untagged' )
expect ( results . posts . length ) . to eq ( 0 )
results = Search . execute ( 'in:tagged' )
expect ( results . posts . length ) . to eq ( 1 )
2020-09-23 16:59:42 +08:00
results = Search . execute ( 'In:TaGgEd' )
expect ( results . posts . length ) . to eq ( 1 )
2019-06-28 16:19:57 +08:00
end
end
context 'in:untagged' do
it 'allows for searching by presence of no tags' do
topic = Fabricate ( :topic , title : 'I am testing a untagged search' )
_post = Fabricate ( :post , topic : topic , raw : 'this is the first post' )
2020-09-23 16:59:42 +08:00
results = Search . execute ( 'iN:uNtAgGeD' )
2019-06-28 16:19:57 +08:00
expect ( results . posts . length ) . to eq ( 1 )
results = Search . execute ( 'in:tagged' )
expect ( results . posts . length ) . to eq ( 0 )
end
end
2020-08-07 10:47:00 +08:00
context 'plugin extensions' do
let! ( :post0 ) { Fabricate ( :post , raw : 'this is the first post about advanced filter with length more than 50 chars' ) }
let! ( :post1 ) { Fabricate ( :post , raw : 'this is the second post about advanced filter' ) }
it 'allows to define custom filter' do
expect ( Search . new ( " advanced " ) . execute . posts ) . to eq ( [ post1 , post0 ] )
Search . advanced_filter ( / ^min_chars:( \ d+)$ / ) do | posts , match |
posts . where ( " (SELECT LENGTH(p2.raw) FROM posts p2 WHERE p2.id = posts.id) >= ? " , match . to_i )
end
expect ( Search . new ( " advanced min_chars:50 " ) . execute . posts ) . to eq ( [ post0 ] )
end
it 'allows to define custom order' do
expect ( Search . new ( " advanced " ) . execute . posts ) . to eq ( [ post1 , post0 ] )
Search . advanced_order ( :chars ) do | posts |
posts . reorder ( " (SELECT LENGTH(raw) FROM posts WHERE posts.topic_id = subquery.topic_id) DESC " )
end
expect ( Search . new ( " advanced order:chars " ) . execute . posts ) . to eq ( [ post0 , post1 ] )
end
end
2013-02-06 03:16:51 +08:00
end