Drastically improve search performance

The previous approach of joining the posts table into the main search
query was not scaling well. Searches on discuss.flarum.org were taking
~1.5 seconds which – a significant improvement over the pre-beta 8
search, but still not acceptable.

This new approach uses a much more efficient subquery join. Searches
on discuss.flarum.org now take mere milliseconds. The search result
ranking strategy has been further refined as well so that discussions
are ranked by the collective relevance of their posts.
This commit is contained in:
Toby Zerner 2018-11-14 11:19:39 +10:30
parent c0286bb4a3
commit 309bb21ffe

View File

@ -12,10 +12,10 @@
namespace Flarum\Discussion\Search\Gambit;
use Flarum\Discussion\Search\DiscussionSearch;
use Flarum\Event\ScopeModelVisibility;
use Flarum\Post\Post;
use Flarum\Search\AbstractSearch;
use Flarum\Search\GambitInterface;
use Illuminate\Database\Query\Expression;
use LogicException;
class FulltextGambit implements GambitInterface
@ -36,26 +36,35 @@ class FulltextGambit implements GambitInterface
$query = $search->getQuery();
$grammar = $query->getGrammar();
$query
->selectRaw('SUBSTRING_INDEX(GROUP_CONCAT('.$grammar->wrap('posts.id').' ORDER BY MATCH('.$grammar->wrap('posts.content').') AGAINST (?) DESC), \',\', 1) as most_relevant_post_id', [$bit])
->leftJoin('posts', 'posts.discussion_id', '=', 'discussions.id')
// Construct a subquery to fetch discussions which contain relevant
// posts. Retrieve the collective relevance of each discussion's posts,
// which we will use later in the order by clause, and also retrieve
// the ID of the most relevant post.
$subquery = Post::whereVisibleTo($search->getActor())
->select('posts.discussion_id')
->selectRaw('SUM(MATCH('.$grammar->wrap('posts.content').') AGAINST (?)) as score', [$bit])
->selectRaw('SUBSTRING_INDEX(GROUP_CONCAT('.$grammar->wrap('posts.id').' ORDER BY MATCH('.$grammar->wrap('posts.content').') AGAINST (?) DESC, '.$grammar->wrap('posts.number').'), \',\', 1) as most_relevant_post_id', [$bit])
->where('posts.type', 'comment')
->where(function ($query) use ($search) {
event(new ScopeModelVisibility(Post::query()->setQuery($query), $search->getActor(), 'view'));
})
->where(function ($query) use ($bit) {
$grammar = $query->getGrammar();
$query->whereRaw('MATCH('.$grammar->wrap('discussions.title').') AGAINST (? IN BOOLEAN MODE)', [$bit])
->orWhereRaw('MATCH('.$grammar->wrap('posts.content').') AGAINST (? IN BOOLEAN MODE)', [$bit]);
})
->whereRaw('MATCH('.$grammar->wrap('posts.content').') AGAINST (? IN BOOLEAN MODE)', [$bit])
->groupBy('posts.discussion_id');
$search->setDefaultSort(function ($query) use ($bit) {
$grammar = $query->getGrammar();
// Join the subquery into the main search query and scope results to
// discussions that have a relevant title or that contain relevant posts.
$query
->addSelect('posts_ft.most_relevant_post_id')
->join(
new Expression('('.$subquery->toSql().') '.$grammar->wrap('posts_ft')),
'posts_ft.discussion_id', '=', 'discussions.id'
)
->addBinding($subquery->getBindings(), 'join')
->where(function ($query) use ($grammar, $bit) {
$query->whereRaw('MATCH('.$grammar->wrap('discussions.title').') AGAINST (? IN BOOLEAN MODE)', [$bit])
->orWhereNotNull('posts_ft.score');
});
$search->setDefaultSort(function ($query) use ($grammar, $bit) {
$query->orderByRaw('MATCH('.$grammar->wrap('discussions.title').') AGAINST (?) desc', [$bit]);
$query->orderByRaw('MATCH('.$grammar->wrap('posts.content').') AGAINST (?) desc', [$bit]);
$query->orderBy('posts_ft.score', 'desc');
});
}
}