From 509af2463d94210fd500c10c651fa8f883e87fd8 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Wed, 11 Dec 2024 15:53:57 +0000 Subject: [PATCH] Search Index: Fixed SQL error when indexing large pages Due to hitting statement placeholder limits (typically 65k) when inserting index terms for single page. Added test to cover. Also added skipped tests for tests we don't always want to run. For #5322 --- app/Search/SearchIndex.php | 20 +++++++++++++++----- tests/Entity/EntitySearchTest.php | 20 ++++++++++++++++++++ tests/LanguageTest.php | 4 +++- 3 files changed, 38 insertions(+), 6 deletions(-) diff --git a/app/Search/SearchIndex.php b/app/Search/SearchIndex.php index d9fc4e7aa..c7d9d6502 100644 --- a/app/Search/SearchIndex.php +++ b/app/Search/SearchIndex.php @@ -30,7 +30,7 @@ class SearchIndex { $this->deleteEntityTerms($entity); $terms = $this->entityToTermDataArray($entity); - SearchTerm::query()->insert($terms); + $this->insertTerms($terms); } /** @@ -46,10 +46,7 @@ class SearchIndex array_push($terms, ...$entityTerms); } - $chunkedTerms = array_chunk($terms, 500); - foreach ($chunkedTerms as $termChunk) { - SearchTerm::query()->insert($termChunk); - } + $this->insertTerms($terms); } /** @@ -99,6 +96,19 @@ class SearchIndex $entity->searchTerms()->delete(); } + /** + * Insert the given terms into the database. + * Chunks through the given terms to remain within database limits. + * @param array[] $terms + */ + protected function insertTerms(array $terms): void + { + $chunkedTerms = array_chunk($terms, 500); + foreach ($chunkedTerms as $termChunk) { + SearchTerm::query()->insert($termChunk); + } + } + /** * Create a scored term array from the given text, where the keys are the terms * and the values are their scores. diff --git a/tests/Entity/EntitySearchTest.php b/tests/Entity/EntitySearchTest.php index cabf23bd3..5ace70e3a 100644 --- a/tests/Entity/EntitySearchTest.php +++ b/tests/Entity/EntitySearchTest.php @@ -6,6 +6,7 @@ use BookStack\Activity\Models\Tag; use BookStack\Entities\Models\Book; use BookStack\Entities\Models\Bookshelf; use BookStack\Entities\Models\Chapter; +use Illuminate\Support\Str; use Tests\TestCase; class EntitySearchTest extends TestCase @@ -477,6 +478,25 @@ class EntitySearchTest extends TestCase $this->assertEquals(2, $scoreByTerm->get('TermG')); } + public function test_indexing_works_as_expected_for_page_with_lots_of_terms() + { + $this->markTestSkipped('Time consuming test'); + + $count = 100000; + $text = ''; + $chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_#'; + for ($i = 0; $i < $count; $i++) { + $text .= substr(str_shuffle($chars), 0, 5) . ' '; + } + + $page = $this->entities->newPage(['name' => 'Test page A', 'html' => '

' . $text . '

']); + + $termCount = $page->searchTerms()->count(); + + // Expect at least 90% unique rate + $this->assertGreaterThan($count * 0.9, $termCount); + } + public function test_name_and_content_terms_are_merged_to_single_score() { $page = $this->entities->newPage(['name' => 'TermA', 'html' => ' diff --git a/tests/LanguageTest.php b/tests/LanguageTest.php index abe06407e..28491c3af 100644 --- a/tests/LanguageTest.php +++ b/tests/LanguageTest.php @@ -27,8 +27,10 @@ class LanguageTest extends TestCase } // Not part of standard phpunit test runs since we sometimes expect non-added langs. - public function do_test_locales_all_have_language_dropdown_entry() + public function test_locales_all_have_language_dropdown_entry() { + $this->markTestSkipped('Only used when checking language inclusion'); + $dropdownLocales = array_keys(trans('settings.language_select', [], 'en')); sort($dropdownLocales); sort($this->langs);