fix: languages with combining characters cannot be searched ()

* test: searching other types of languages
* fix: languages with combining characters cannot be searched
* test: can search in discussion titles
This commit is contained in:
Sami Mazouz 2022-04-09 23:04:15 +01:00 committed by GitHub
parent b5e5ae8c4c
commit 6de1ea0194
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 52 additions and 3 deletions
framework/core
src/Discussion/Search/Gambit
tests/integration/api/discussions

@ -25,7 +25,7 @@ class FulltextGambit implements GambitInterface
// Replace all non-word characters with spaces.
// We do this to prevent MySQL fulltext search boolean mode from taking
// effect: https://dev.mysql.com/doc/refman/5.7/en/fulltext-boolean.html
$bit = preg_replace('/[^\p{L}\p{N}_]+/u', ' ', $bit);
$bit = preg_replace('/[^\p{L}\p{N}\p{M}_]+/u', ' ', $bit);
$query = $search->getQuery();
$grammar = $query->getGrammar();

@ -12,6 +12,7 @@ namespace Flarum\Tests\integration\api\discussions;
use Carbon\Carbon;
use Flarum\Testing\integration\RetrievesAuthorizedUsers;
use Flarum\Testing\integration\TestCase;
use Illuminate\Support\Arr;
class ListWithFulltextSearchTest extends TestCase
{
@ -34,6 +35,8 @@ class ListWithFulltextSearchTest extends TestCase
['id' => 2, 'title' => 'lightsail in title too', 'created_at' => Carbon::createFromDate(2020, 01, 01)->toDateTimeString(), 'user_id' => 1, 'comment_count' => 1],
['id' => 3, 'title' => 'not in title either', 'created_at' => Carbon::now()->toDateTimeString(), 'user_id' => 1, 'comment_count' => 1],
['id' => 4, 'title' => 'not in title or text', 'created_at' => Carbon::now()->toDateTimeString(), 'user_id' => 1, 'comment_count' => 1],
['id' => 5, 'title' => 'తెలుగు', 'created_at' => Carbon::now()->toDateTimeString(), 'user_id' => 1, 'comment_count' => 1],
['id' => 6, 'title' => '支持中文吗', 'created_at' => Carbon::now()->toDateTimeString(), 'user_id' => 1, 'comment_count' => 1],
]);
$this->database()->table('posts')->insert([
@ -42,6 +45,8 @@ class ListWithFulltextSearchTest extends TestCase
['id' => 3, 'discussion_id' => 2, 'created_at' => Carbon::now()->toDateTimeString(), 'user_id' => 1, 'type' => 'comment', 'content' => '<t><p>another lightsail for discussion 2!</p></t>'],
['id' => 4, 'discussion_id' => 3, 'created_at' => Carbon::now()->toDateTimeString(), 'user_id' => 1, 'type' => 'comment', 'content' => '<t><p>just one lightsail for discussion 3.</p></t>'],
['id' => 5, 'discussion_id' => 4, 'created_at' => Carbon::now()->toDateTimeString(), 'user_id' => 1, 'type' => 'comment', 'content' => '<t><p>not in title or text</p></t>'],
['id' => 6, 'discussion_id' => 4, 'created_at' => Carbon::now()->toDateTimeString(), 'user_id' => 1, 'type' => 'comment', 'content' => '<t><p>తెలుగు</p></t>'],
['id' => 7, 'discussion_id' => 2, 'created_at' => Carbon::now()->toDateTimeString(), 'user_id' => 1, 'type' => 'comment', 'content' => '<t><p>支持中文吗</p></t>'],
]);
// We need to call these again, since we rolled back the transaction started by `::app()`.
@ -79,7 +84,7 @@ class ListWithFulltextSearchTest extends TestCase
return $row['id'];
}, $data['data']);
$this->assertEquals(['2', '1', '3'], $ids, 'IDs do not match');
$this->assertEqualsCanonicalizing(['2', '1', '3'], $ids, 'IDs do not match');
}
/**
@ -100,7 +105,51 @@ class ListWithFulltextSearchTest extends TestCase
return $row['id'];
}, $data['data']);
$this->assertEquals(['2', '1', '3'], $ids, 'IDs do not match');
$this->assertEqualsCanonicalizing(['2', '1', '3'], $ids, 'IDs do not match');
}
/**
* @test
*/
public function can_search_telugu_like_languages()
{
$response = $this->send(
$this->request('GET', '/api/discussions')
->withQueryParams([
'filter' => ['q' => 'తెలుగు'],
'include' => 'mostRelevantPost',
])
);
$data = json_decode($response->getBody()->getContents(), true);
$ids = array_map(function ($row) {
return $row['id'];
}, $data['data']);
$this->assertEqualsCanonicalizing(['4', '5'], $ids, 'IDs do not match');
$this->assertEqualsCanonicalizing(['6'], Arr::pluck($data['included'], 'id'));
}
/**
* @test
*/
public function can_search_cjk_languages()
{
$response = $this->send(
$this->request('GET', '/api/discussions')
->withQueryParams([
'filter' => ['q' => '支持中文吗'],
'include' => 'mostRelevantPost',
])
);
$data = json_decode($response->getBody()->getContents(), true);
$ids = array_map(function ($row) {
return $row['id'];
}, $data['data']);
$this->assertEqualsCanonicalizing(['2', '6'], $ids, 'IDs do not match');
$this->assertEqualsCanonicalizing(['7'], Arr::pluck($data['included'], 'id'));
}
/**