2018-09-25 19:30:50 +08:00
< ? php namespace BookStack\Entities ;
use BookStack\Auth\Permissions\PermissionService ;
2017-03-19 20:48:44 +08:00
use Illuminate\Database\Connection ;
2018-09-26 01:00:40 +08:00
use Illuminate\Database\Eloquent\Builder as EloquentBuilder ;
2017-03-27 18:57:33 +08:00
use Illuminate\Database\Query\Builder ;
2017-03-19 20:48:44 +08:00
use Illuminate\Database\Query\JoinClause ;
2017-04-16 02:16:07 +08:00
use Illuminate\Support\Collection ;
2019-09-14 06:58:40 +08:00
use Illuminate\Support\Str ;
2017-03-19 20:48:44 +08:00
class SearchService
{
2018-09-26 01:00:40 +08:00
/**
* @ var SearchTerm
*/
2017-03-19 20:48:44 +08:00
protected $searchTerm ;
2018-09-26 01:00:40 +08:00
/**
* @ var EntityProvider
*/
protected $entityProvider ;
/**
* @ var Connection
*/
2017-03-19 20:48:44 +08:00
protected $db ;
2018-09-23 19:34:30 +08:00
/**
2018-09-26 01:00:40 +08:00
* @ var PermissionService
2018-09-23 19:34:30 +08:00
*/
2018-09-26 01:00:40 +08:00
protected $permissionService ;
2017-03-19 20:48:44 +08:00
2017-03-28 01:05:34 +08:00
/**
* Acceptable operators to be used in a query
* @ var array
*/
protected $queryOperators = [ '<=' , '>=' , '=' , '<' , '>' , 'like' , '!=' ];
2017-03-19 20:48:44 +08:00
/**
* SearchService constructor .
*/
2018-09-26 01:00:40 +08:00
public function __construct ( SearchTerm $searchTerm , EntityProvider $entityProvider , Connection $db , PermissionService $permissionService )
2017-03-19 20:48:44 +08:00
{
$this -> searchTerm = $searchTerm ;
2018-09-26 01:00:40 +08:00
$this -> entityProvider = $entityProvider ;
2017-03-19 20:48:44 +08:00
$this -> db = $db ;
2017-03-27 02:24:57 +08:00
$this -> permissionService = $permissionService ;
2017-03-19 20:48:44 +08:00
}
2017-04-30 18:38:58 +08:00
/**
* Set the database connection
*/
public function setConnection ( Connection $connection )
{
$this -> db = $connection ;
}
2017-03-27 18:57:33 +08:00
/**
* Search all entities in the system .
2020-06-27 20:29:00 +08:00
* The provided count is for each entity to search ,
* Total returned could can be larger and not guaranteed .
2017-03-27 18:57:33 +08:00
*/
2020-06-27 20:29:00 +08:00
public function searchEntities ( SearchOptions $searchOpts , string $entityType = 'all' , int $page = 1 , int $count = 20 , string $action = 'view' ) : array
2017-03-19 20:48:44 +08:00
{
2018-09-26 01:00:40 +08:00
$entityTypes = array_keys ( $this -> entityProvider -> all ());
2017-04-10 03:59:57 +08:00
$entityTypesToSearch = $entityTypes ;
if ( $entityType !== 'all' ) {
$entityTypesToSearch = $entityType ;
2020-06-27 20:29:00 +08:00
} else if ( isset ( $searchOpts -> filters [ 'type' ])) {
$entityTypesToSearch = explode ( '|' , $searchOpts -> filters [ 'type' ]);
2017-04-10 03:59:57 +08:00
}
2018-03-25 02:46:31 +08:00
$results = collect ();
2017-04-15 22:04:30 +08:00
$total = 0 ;
2018-03-25 03:04:18 +08:00
$hasMore = false ;
2017-04-15 22:04:30 +08:00
2017-04-10 03:59:57 +08:00
foreach ( $entityTypesToSearch as $entityType ) {
2018-01-29 00:58:52 +08:00
if ( ! in_array ( $entityType , $entityTypes )) {
continue ;
}
2020-06-27 20:29:00 +08:00
$search = $this -> searchEntityTable ( $searchOpts , $entityType , $page , $count , $action );
$entityTotal = $this -> searchEntityTable ( $searchOpts , $entityType , $page , $count , $action , true );
2018-03-25 03:04:18 +08:00
if ( $entityTotal > $page * $count ) {
$hasMore = true ;
}
$total += $entityTotal ;
2017-04-10 03:59:57 +08:00
$results = $results -> merge ( $search );
}
2017-03-27 02:24:57 +08:00
2017-04-15 22:04:30 +08:00
return [
'total' => $total ,
'count' => count ( $results ),
2018-03-25 03:04:18 +08:00
'has_more' => $hasMore ,
2020-06-27 20:29:00 +08:00
'results' => $results -> sortByDesc ( 'score' ) -> values (),
2017-04-15 22:04:30 +08:00
];
2017-03-27 02:24:57 +08:00
}
2017-04-16 02:16:07 +08:00
/**
* Search a book for entities
*/
2020-06-27 20:29:00 +08:00
public function searchBook ( int $bookId , string $searchString ) : Collection
2017-04-16 02:16:07 +08:00
{
2020-06-27 20:29:00 +08:00
$opts = SearchOptions :: fromString ( $searchString );
2017-04-16 02:31:11 +08:00
$entityTypes = [ 'page' , 'chapter' ];
2020-06-27 20:29:00 +08:00
$entityTypesToSearch = isset ( $opts -> filters [ 'type' ]) ? explode ( '|' , $opts -> filters [ 'type' ]) : $entityTypes ;
2017-04-16 02:31:11 +08:00
2017-04-16 02:16:07 +08:00
$results = collect ();
2017-04-16 02:31:11 +08:00
foreach ( $entityTypesToSearch as $entityType ) {
2018-01-29 00:58:52 +08:00
if ( ! in_array ( $entityType , $entityTypes )) {
continue ;
}
2020-06-27 20:29:00 +08:00
$search = $this -> buildEntitySearchQuery ( $opts , $entityType ) -> where ( 'book_id' , '=' , $bookId ) -> take ( 20 ) -> get ();
2017-04-16 02:31:11 +08:00
$results = $results -> merge ( $search );
}
return $results -> sortByDesc ( 'score' ) -> take ( 20 );
2017-04-16 02:16:07 +08:00
}
/**
* Search a book for entities
*/
2020-06-27 20:29:00 +08:00
public function searchChapter ( int $chapterId , string $searchString ) : Collection
2017-04-16 02:16:07 +08:00
{
2020-06-27 20:29:00 +08:00
$opts = SearchOptions :: fromString ( $searchString );
$pages = $this -> buildEntitySearchQuery ( $opts , 'page' ) -> where ( 'chapter_id' , '=' , $chapterId ) -> take ( 20 ) -> get ();
2017-04-16 02:16:07 +08:00
return $pages -> sortByDesc ( 'score' );
}
2017-03-27 18:57:33 +08:00
/**
* Search across a particular entity type .
2020-06-27 20:29:00 +08:00
* Setting getCount = true will return the total
* matching instead of the items themselves .
2017-04-15 22:04:30 +08:00
* @ return \Illuminate\Database\Eloquent\Collection | int | static []
2017-03-27 18:57:33 +08:00
*/
2020-06-27 20:29:00 +08:00
public function searchEntityTable ( SearchOptions $searchOpts , string $entityType = 'page' , int $page = 1 , int $count = 20 , string $action = 'view' , bool $getCount = false )
2017-04-16 02:16:07 +08:00
{
2020-06-27 20:29:00 +08:00
$query = $this -> buildEntitySearchQuery ( $searchOpts , $entityType , $action );
2018-01-29 00:58:52 +08:00
if ( $getCount ) {
return $query -> count ();
}
2017-04-16 02:16:07 +08:00
$query = $query -> skip (( $page - 1 ) * $count ) -> take ( $count );
return $query -> get ();
}
/**
* Create a search query for an entity
*/
2020-06-27 20:29:00 +08:00
protected function buildEntitySearchQuery ( SearchOptions $searchOpts , string $entityType = 'page' , string $action = 'view' ) : EloquentBuilder
2017-03-27 02:24:57 +08:00
{
2018-09-26 01:00:40 +08:00
$entity = $this -> entityProvider -> get ( $entityType );
2017-03-27 18:57:33 +08:00
$entitySelect = $entity -> newQuery ();
// Handle normal search terms
2020-06-27 20:29:00 +08:00
if ( count ( $searchOpts -> searches ) > 0 ) {
2017-03-27 18:57:33 +08:00
$subQuery = $this -> db -> table ( 'search_terms' ) -> select ( 'entity_id' , 'entity_type' , \DB :: raw ( 'SUM(score) as score' ));
2018-09-26 01:00:40 +08:00
$subQuery -> where ( 'entity_type' , '=' , $entity -> getMorphClass ());
2020-06-27 20:29:00 +08:00
$subQuery -> where ( function ( Builder $query ) use ( $searchOpts ) {
foreach ( $searchOpts -> searches as $inputTerm ) {
2017-03-27 18:57:33 +08:00
$query -> orWhere ( 'term' , 'like' , $inputTerm . '%' );
}
}) -> groupBy ( 'entity_type' , 'entity_id' );
2018-01-29 00:58:52 +08:00
$entitySelect -> join ( \DB :: raw ( '(' . $subQuery -> toSql () . ') as s' ), function ( JoinClause $join ) {
2017-03-27 18:57:33 +08:00
$join -> on ( 'id' , '=' , 'entity_id' );
}) -> selectRaw ( $entity -> getTable () . '.*, s.score' ) -> orderBy ( 'score' , 'desc' );
$entitySelect -> mergeBindings ( $subQuery );
}
// Handle exact term matching
2020-06-27 20:29:00 +08:00
if ( count ( $searchOpts -> exacts ) > 0 ) {
$entitySelect -> where ( function ( EloquentBuilder $query ) use ( $searchOpts , $entity ) {
foreach ( $searchOpts -> exacts as $inputTerm ) {
2018-09-26 01:00:40 +08:00
$query -> where ( function ( EloquentBuilder $query ) use ( $inputTerm , $entity ) {
2017-03-27 18:57:33 +08:00
$query -> where ( 'name' , 'like' , '%' . $inputTerm . '%' )
-> orWhere ( $entity -> textField , 'like' , '%' . $inputTerm . '%' );
});
}
});
}
2017-03-28 01:05:34 +08:00
// Handle tag searches
2020-06-27 20:29:00 +08:00
foreach ( $searchOpts -> tags as $inputTerm ) {
2017-03-28 01:05:34 +08:00
$this -> applyTagSearch ( $entitySelect , $inputTerm );
}
// Handle filters
2020-06-27 20:29:00 +08:00
foreach ( $searchOpts -> filters as $filterTerm => $filterValue ) {
2019-09-14 06:58:40 +08:00
$functionName = Str :: camel ( 'filter_' . $filterTerm );
2018-01-29 00:58:52 +08:00
if ( method_exists ( $this , $functionName )) {
$this -> $functionName ( $entitySelect , $entity , $filterValue );
}
2017-03-28 01:05:34 +08:00
}
2018-04-15 01:00:16 +08:00
return $this -> permissionService -> enforceEntityRestrictions ( $entityType , $entitySelect , $action );
2017-03-27 02:24:57 +08:00
}
2017-03-28 01:05:34 +08:00
/**
* Get the available query operators as a regex escaped list .
*/
2020-06-27 20:29:00 +08:00
protected function getRegexEscapedOperators () : string
2017-03-28 01:05:34 +08:00
{
$escapedOperators = [];
foreach ( $this -> queryOperators as $operator ) {
$escapedOperators [] = preg_quote ( $operator );
}
return join ( '|' , $escapedOperators );
}
/**
* Apply a tag search term onto a entity query .
*/
2020-06-27 20:29:00 +08:00
protected function applyTagSearch ( EloquentBuilder $query , string $tagTerm ) : EloquentBuilder
2018-01-29 00:58:52 +08:00
{
2017-03-28 01:05:34 +08:00
preg_match ( " /^(.*?)(( " . $this -> getRegexEscapedOperators () . " )(.*?))? $ / " , $tagTerm , $tagSplit );
2018-09-26 01:00:40 +08:00
$query -> whereHas ( 'tags' , function ( EloquentBuilder $query ) use ( $tagSplit ) {
2017-03-28 01:05:34 +08:00
$tagName = $tagSplit [ 1 ];
$tagOperator = count ( $tagSplit ) > 2 ? $tagSplit [ 3 ] : '' ;
$tagValue = count ( $tagSplit ) > 3 ? $tagSplit [ 4 ] : '' ;
$validOperator = in_array ( $tagOperator , $this -> queryOperators );
if ( ! empty ( $tagOperator ) && ! empty ( $tagValue ) && $validOperator ) {
2018-01-29 00:58:52 +08:00
if ( ! empty ( $tagName )) {
$query -> where ( 'name' , '=' , $tagName );
}
2017-03-28 01:05:34 +08:00
if ( is_numeric ( $tagValue ) && $tagOperator !== 'like' ) {
// We have to do a raw sql query for this since otherwise PDO will quote the value and MySQL will
// search the value as a string which prevents being able to do number-based operations
// on the tag values. We ensure it has a numeric value and then cast it just to be sure.
$tagValue = ( float ) trim ( $query -> getConnection () -> getPdo () -> quote ( $tagValue ), " ' " );
$query -> whereRaw ( " value ${ tagOperator } ${ tagValue } " );
} else {
$query -> where ( 'value' , $tagOperator , $tagValue );
}
} else {
$query -> where ( 'name' , '=' , $tagName );
}
});
return $query ;
}
2017-03-19 20:48:44 +08:00
/**
* Index the given entity .
*/
public function indexEntity ( Entity $entity )
{
$this -> deleteEntityTerms ( $entity );
2018-03-25 02:46:31 +08:00
$nameTerms = $this -> generateTermArrayFromText ( $entity -> name , 5 * $entity -> searchFactor );
$bodyTerms = $this -> generateTermArrayFromText ( $entity -> getText (), 1 * $entity -> searchFactor );
2017-03-19 20:48:44 +08:00
$terms = array_merge ( $nameTerms , $bodyTerms );
2017-03-27 02:34:53 +08:00
foreach ( $terms as $index => $term ) {
$terms [ $index ][ 'entity_type' ] = $entity -> getMorphClass ();
$terms [ $index ][ 'entity_id' ] = $entity -> id ;
}
$this -> searchTerm -> newQuery () -> insert ( $terms );
2017-03-19 20:48:44 +08:00
}
/**
* Index multiple Entities at once
2018-09-25 19:30:50 +08:00
* @ param \BookStack\Entities\Entity [] $entities
2017-03-19 20:48:44 +08:00
*/
2018-01-29 00:58:52 +08:00
protected function indexEntities ( $entities )
{
2017-03-19 20:48:44 +08:00
$terms = [];
foreach ( $entities as $entity ) {
2018-03-25 02:46:31 +08:00
$nameTerms = $this -> generateTermArrayFromText ( $entity -> name , 5 * $entity -> searchFactor );
$bodyTerms = $this -> generateTermArrayFromText ( $entity -> getText (), 1 * $entity -> searchFactor );
2017-03-19 20:48:44 +08:00
foreach ( array_merge ( $nameTerms , $bodyTerms ) as $term ) {
$term [ 'entity_id' ] = $entity -> id ;
$term [ 'entity_type' ] = $entity -> getMorphClass ();
$terms [] = $term ;
}
}
2017-03-27 02:24:57 +08:00
$chunkedTerms = array_chunk ( $terms , 500 );
foreach ( $chunkedTerms as $termChunk ) {
2017-03-27 02:34:53 +08:00
$this -> searchTerm -> newQuery () -> insert ( $termChunk );
2017-03-27 02:24:57 +08:00
}
2017-03-19 20:48:44 +08:00
}
/**
* Delete and re - index the terms for all entities in the system .
*/
public function indexAllEntities ()
{
$this -> searchTerm -> truncate ();
2018-09-26 01:00:40 +08:00
foreach ( $this -> entityProvider -> all () as $entityModel ) {
2018-09-23 19:34:30 +08:00
$selectFields = [ 'id' , 'name' , $entityModel -> textField ];
$entityModel -> newQuery () -> select ( $selectFields ) -> chunk ( 1000 , function ( $entities ) {
$this -> indexEntities ( $entities );
});
}
2017-03-19 20:48:44 +08:00
}
/**
* Delete related Entity search terms .
* @ param Entity $entity
*/
public function deleteEntityTerms ( Entity $entity )
{
$entity -> searchTerms () -> delete ();
}
/**
* Create a scored term array from the given text .
* @ param $text
* @ param float | int $scoreAdjustment
* @ return array
*/
protected function generateTermArrayFromText ( $text , $scoreAdjustment = 1 )
{
$tokenMap = []; // {TextToken => OccurrenceCount}
2017-10-16 01:51:01 +08:00
$splitChars = " \n \t .,!?:;()[] { }<>`' \" " ;
2017-09-30 21:14:23 +08:00
$token = strtok ( $text , $splitChars );
while ( $token !== false ) {
2018-01-29 00:58:52 +08:00
if ( ! isset ( $tokenMap [ $token ])) {
$tokenMap [ $token ] = 0 ;
}
2017-03-19 20:48:44 +08:00
$tokenMap [ $token ] ++ ;
2017-09-30 21:14:23 +08:00
$token = strtok ( $splitChars );
2017-03-19 20:48:44 +08:00
}
$terms = [];
foreach ( $tokenMap as $token => $count ) {
$terms [] = [
'term' => $token ,
'score' => $count * $scoreAdjustment
];
}
return $terms ;
}
2017-03-28 01:05:34 +08:00
/**
* Custom entity search filters
*/
2018-09-26 01:00:40 +08:00
protected function filterUpdatedAfter ( EloquentBuilder $query , Entity $model , $input )
2017-03-28 01:05:34 +08:00
{
2018-01-29 00:58:52 +08:00
try {
$date = date_create ( $input );
} catch ( \Exception $e ) {
return ;
}
2017-03-28 01:05:34 +08:00
$query -> where ( 'updated_at' , '>=' , $date );
}
2018-09-26 01:00:40 +08:00
protected function filterUpdatedBefore ( EloquentBuilder $query , Entity $model , $input )
2017-03-28 01:05:34 +08:00
{
2018-01-29 00:58:52 +08:00
try {
$date = date_create ( $input );
} catch ( \Exception $e ) {
return ;
}
2017-03-28 01:05:34 +08:00
$query -> where ( 'updated_at' , '<' , $date );
}
2018-09-26 01:00:40 +08:00
protected function filterCreatedAfter ( EloquentBuilder $query , Entity $model , $input )
2017-03-28 01:05:34 +08:00
{
2018-01-29 00:58:52 +08:00
try {
$date = date_create ( $input );
} catch ( \Exception $e ) {
return ;
}
2017-03-28 01:05:34 +08:00
$query -> where ( 'created_at' , '>=' , $date );
}
2018-09-26 01:00:40 +08:00
protected function filterCreatedBefore ( EloquentBuilder $query , Entity $model , $input )
2017-03-28 01:05:34 +08:00
{
2018-01-29 00:58:52 +08:00
try {
$date = date_create ( $input );
} catch ( \Exception $e ) {
return ;
}
2017-03-28 01:05:34 +08:00
$query -> where ( 'created_at' , '<' , $date );
}
2018-09-26 01:00:40 +08:00
protected function filterCreatedBy ( EloquentBuilder $query , Entity $model , $input )
2017-03-28 01:05:34 +08:00
{
2018-01-29 00:58:52 +08:00
if ( ! is_numeric ( $input ) && $input !== 'me' ) {
return ;
}
if ( $input === 'me' ) {
$input = user () -> id ;
}
2017-03-28 01:05:34 +08:00
$query -> where ( 'created_by' , '=' , $input );
}
2018-09-26 01:00:40 +08:00
protected function filterUpdatedBy ( EloquentBuilder $query , Entity $model , $input )
2017-03-28 01:05:34 +08:00
{
2018-01-29 00:58:52 +08:00
if ( ! is_numeric ( $input ) && $input !== 'me' ) {
return ;
}
if ( $input === 'me' ) {
$input = user () -> id ;
}
2017-03-28 01:05:34 +08:00
$query -> where ( 'updated_by' , '=' , $input );
}
2018-09-26 01:00:40 +08:00
protected function filterInName ( EloquentBuilder $query , Entity $model , $input )
2017-03-28 01:05:34 +08:00
{
$query -> where ( 'name' , 'like' , '%' . $input . '%' );
}
2018-09-26 01:00:40 +08:00
protected function filterInTitle ( EloquentBuilder $query , Entity $model , $input )
2018-01-29 00:58:52 +08:00
{
$this -> filterInName ( $query , $model , $input );
}
2017-03-28 01:05:34 +08:00
2018-09-26 01:00:40 +08:00
protected function filterInBody ( EloquentBuilder $query , Entity $model , $input )
2017-03-28 01:05:34 +08:00
{
$query -> where ( $model -> textField , 'like' , '%' . $input . '%' );
}
2018-09-26 01:00:40 +08:00
protected function filterIsRestricted ( EloquentBuilder $query , Entity $model , $input )
2017-03-28 01:05:34 +08:00
{
$query -> where ( 'restricted' , '=' , true );
}
2018-09-26 01:00:40 +08:00
protected function filterViewedByMe ( EloquentBuilder $query , Entity $model , $input )
2017-03-28 01:05:34 +08:00
{
2018-01-29 00:58:52 +08:00
$query -> whereHas ( 'views' , function ( $query ) {
2017-03-28 01:05:34 +08:00
$query -> where ( 'user_id' , '=' , user () -> id );
});
}
2018-09-26 01:00:40 +08:00
protected function filterNotViewedByMe ( EloquentBuilder $query , Entity $model , $input )
2017-03-28 01:05:34 +08:00
{
2018-01-29 00:58:52 +08:00
$query -> whereDoesntHave ( 'views' , function ( $query ) {
2017-03-28 01:05:34 +08:00
$query -> where ( 'user_id' , '=' , user () -> id );
});
}
2018-09-26 01:00:40 +08:00
protected function filterSortBy ( EloquentBuilder $query , Entity $model , $input )
2017-10-01 18:24:13 +08:00
{
2019-09-14 06:58:40 +08:00
$functionName = Str :: camel ( 'sort_by_' . $input );
2018-01-29 00:58:52 +08:00
if ( method_exists ( $this , $functionName )) {
$this -> $functionName ( $query , $model );
}
2017-10-01 18:24:13 +08:00
}
/**
* Sorting filter options
*/
2018-09-26 01:00:40 +08:00
protected function sortByLastCommented ( EloquentBuilder $query , Entity $model )
2017-10-01 18:24:13 +08:00
{
$commentsTable = $this -> db -> getTablePrefix () . 'comments' ;
$morphClass = str_replace ( '\\' , '\\\\' , $model -> getMorphClass ());
$commentQuery = $this -> db -> raw ( '(SELECT c1.entity_id, c1.entity_type, c1.created_at as last_commented FROM ' . $commentsTable . ' c1 LEFT JOIN ' . $commentsTable . ' c2 ON (c1.entity_id = c2.entity_id AND c1.entity_type = c2.entity_type AND c1.created_at < c2.created_at) WHERE c1.entity_type = \'' . $morphClass . '\' AND c2.created_at IS NULL) as comments' );
$query -> join ( $commentQuery , $model -> getTable () . '.id' , '=' , 'comments.entity_id' ) -> orderBy ( 'last_commented' , 'desc' );
}
2018-01-29 00:58:52 +08:00
}