From 1f2411e15e46fbfb7a499a69a869663c349cca91 Mon Sep 17 00:00:00 2001 From: Alexander Skvortsov <38059171+askvortsov1@users.noreply.github.com> Date: Tue, 20 Apr 2021 18:52:14 -0400 Subject: [PATCH] Fix searching titles in discussions (#2698) * Fix searching titles in discussions * Apply fixes from StyleCI * Fix tests * Distinct by discussion ID * Replace distinct with groupBy Co-authored-by: Alexander Skvortsov --- .../Search/Gambit/FulltextGambit.php | 16 ++- .../api/discussions/FulltextSearchTest.php | 125 ++++++++++++++++++ 2 files changed, 135 insertions(+), 6 deletions(-) create mode 100644 tests/integration/api/discussions/FulltextSearchTest.php diff --git a/src/Discussion/Search/Gambit/FulltextGambit.php b/src/Discussion/Search/Gambit/FulltextGambit.php index f57e13c76..2c1798eb0 100644 --- a/src/Discussion/Search/Gambit/FulltextGambit.php +++ b/src/Discussion/Search/Gambit/FulltextGambit.php @@ -9,6 +9,7 @@ namespace Flarum\Discussion\Search\Gambit; +use Flarum\Discussion\Discussion; use Flarum\Post\Post; use Flarum\Search\GambitInterface; use Flarum\Search\SearchState; @@ -29,6 +30,11 @@ class FulltextGambit implements GambitInterface $query = $search->getQuery(); $grammar = $query->getGrammar(); + $discussionSubquery = Discussion::select('id') + ->selectRaw('NULL as score') + ->selectRaw('first_post_id as most_relevant_post_id') + ->whereRaw('MATCH('.$grammar->wrap('discussions.title').') AGAINST (? IN BOOLEAN MODE)', [$bit]); + // Construct a subquery to fetch discussions which contain relevant // posts. Retrieve the collective relevance of each discussion's posts, // which we will use later in the order by clause, and also retrieve @@ -39,7 +45,8 @@ class FulltextGambit implements GambitInterface ->selectRaw('SUBSTRING_INDEX(GROUP_CONCAT('.$grammar->wrap('posts.id').' ORDER BY MATCH('.$grammar->wrap('posts.content').') AGAINST (?) DESC, '.$grammar->wrap('posts.number').'), \',\', 1) as most_relevant_post_id', [$bit]) ->where('posts.type', 'comment') ->whereRaw('MATCH('.$grammar->wrap('posts.content').') AGAINST (? IN BOOLEAN MODE)', [$bit]) - ->groupBy('posts.discussion_id'); + ->groupBy('posts.discussion_id') + ->union($discussionSubquery); // Join the subquery into the main search query and scope results to // discussions that have a relevant title or that contain relevant posts. @@ -51,11 +58,8 @@ class FulltextGambit implements GambitInterface '=', 'discussions.id' ) - ->addBinding($subquery->getBindings(), 'join') - ->where(function ($query) use ($grammar, $bit) { - $query->whereRaw('MATCH('.$grammar->wrap('discussions.title').') AGAINST (? IN BOOLEAN MODE)', [$bit]) - ->orWhereNotNull('posts_ft.score'); - }); + ->groupBy('discussions.id') + ->addBinding($subquery->getBindings(), 'join'); $search->setDefaultSort(function ($query) use ($grammar, $bit) { $query->orderByRaw('MATCH('.$grammar->wrap('discussions.title').') AGAINST (?) desc', [$bit]); diff --git a/tests/integration/api/discussions/FulltextSearchTest.php b/tests/integration/api/discussions/FulltextSearchTest.php new file mode 100644 index 000000000..ca2de82ab --- /dev/null +++ b/tests/integration/api/discussions/FulltextSearchTest.php @@ -0,0 +1,125 @@ +database()->rollBack(); + + // We need to insert these outside of a transaction, because FULLTEXT indexing, + // which is needed for search, doesn't happen in transactions. + // We clean it up explcitly at the end. + $this->database()->table('discussions')->insert([ + ['id' => 1, 'title' => 'lightsail in title', 'created_at' => Carbon::now()->toDateTimeString(), 'user_id' => 1, 'comment_count' => 1], + ['id' => 2, 'title' => 'not in title', 'created_at' => Carbon::now()->toDateTimeString(), 'user_id' => 1, 'comment_count' => 1], + ['id' => 3, 'title' => 'not in title', 'created_at' => Carbon::now()->toDateTimeString(), 'user_id' => 1, 'comment_count' => 1], + ]); + + $this->database()->table('posts')->insert([ + ['id' => 1, 'discussion_id' => 1, 'created_at' => Carbon::now()->toDateTimeString(), 'user_id' => 1, 'type' => 'comment', 'content' => '

not in text

'], + ['id' => 2, 'discussion_id' => 2, 'created_at' => Carbon::now()->toDateTimeString(), 'user_id' => 1, 'type' => 'comment', 'content' => '

lightsail in text

'], + ['id' => 3, 'discussion_id' => 3, 'created_at' => Carbon::now()->toDateTimeString(), 'user_id' => 1, 'type' => 'comment', 'content' => '

not in text

'], + ]); + + // We need to call these again, since we rolled back the transaction started by `::app()`. + $this->database()->beginTransaction(); + + $this->populateDatabase(); + } + + /** + * @inheritDoc + */ + protected function tearDown(): void + { + parent::tearDown(); + + $this->database()->table('discussions')->whereIn('id', [1, 2, 3])->delete(); + $this->database()->table('posts')->whereIn('id', [1, 2, 3])->delete(); + } + + /** + * @test + */ + public function can_search_for_word_or_title_in_post() + { + $response = $this->send( + $this->request('GET', '/api/discussions') + ->withQueryParams([ + 'filter' => ['q' => 'lightsail'], + 'include' => 'mostRelevantPost', + ]) + ); + + $data = json_decode($response->getBody()->getContents(), true); + + $this->assertEqualsCanonicalizing(['1', '2'], Arr::pluck($data['data'], 'id'), 'IDs do not match'); + } + + /** + * @test + */ + public function ignores_non_word_characters_when_searching() + { + $response = $this->send( + $this->request('GET', '/api/discussions') + ->withQueryParams([ + 'filter' => ['q' => 'lightsail+'], + 'include' => 'mostRelevantPost', + ]) + ); + + $data = json_decode($response->getBody()->getContents(), true); + + $this->assertEqualsCanonicalizing(['1', '2'], Arr::pluck($data['data'], 'id'), 'IDs do not match'); + } + + /** + * @test + */ + public function search_for_special_characters_gives_empty_result() + { + $response = $this->send( + $this->request('GET', '/api/discussions') + ->withQueryParams([ + 'filter' => ['q' => '*'], + 'include' => 'mostRelevantPost', + ]) + ); + + $data = json_decode($response->getBody()->getContents(), true); + $this->assertEquals([], $data['data']); + + $response = $this->send( + $this->request('GET', '/api/discussions') + ->withQueryParams([ + 'filter' => ['q' => '@'], + 'include' => 'mostRelevantPost', + ]) + ); + + $data = json_decode($response->getBody()->getContents(), true); + $this->assertEquals([], $data['data']); + } +}