// Copyright 2024 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT package bleve import ( "context" "code.gitea.io/gitea/modules/indexer/conversations/internal" indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve" "github.com/blevesearch/bleve/v2" "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" "github.com/blevesearch/bleve/v2/analysis/token/camelcase" "github.com/blevesearch/bleve/v2/analysis/token/lowercase" "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" "github.com/blevesearch/bleve/v2/mapping" "github.com/blevesearch/bleve/v2/search/query" ) const ( conversationIndexerAnalyzer = "conversationIndexer" conversationIndexerDocType = "conversationIndexerDocType" conversationIndexerLatestVersion = 4 ) const unicodeNormalizeName = "unicodeNormalize" func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error { return m.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{ "type": unicodenorm.Name, "form": unicodenorm.NFC, }) } const maxBatchSize = 16 // IndexerData an update to the conversation indexer type IndexerData internal.IndexerData // Type returns the document type, for bleve's mapping.Classifier interface. func (i *IndexerData) Type() string { return conversationIndexerDocType } // generateConversationIndexMapping generates the bleve index mapping for conversations func generateConversationIndexMapping() (mapping.IndexMapping, error) { mapping := bleve.NewIndexMapping() docMapping := bleve.NewDocumentMapping() numericFieldMapping := bleve.NewNumericFieldMapping() numericFieldMapping.Store = false numericFieldMapping.IncludeInAll = false docMapping.AddFieldMappingsAt("repo_id", numericFieldMapping) textFieldMapping := bleve.NewTextFieldMapping() textFieldMapping.Store = false textFieldMapping.IncludeInAll = false boolFieldMapping := bleve.NewBooleanFieldMapping() boolFieldMapping.Store = false boolFieldMapping.IncludeInAll = false numberFieldMapping := bleve.NewNumericFieldMapping() numberFieldMapping.Store = false numberFieldMapping.IncludeInAll = false docMapping.AddFieldMappingsAt("is_public", boolFieldMapping) docMapping.AddFieldMappingsAt("title", textFieldMapping) docMapping.AddFieldMappingsAt("content", textFieldMapping) docMapping.AddFieldMappingsAt("comments", textFieldMapping) docMapping.AddFieldMappingsAt("is_pull", boolFieldMapping) docMapping.AddFieldMappingsAt("is_closed", boolFieldMapping) docMapping.AddFieldMappingsAt("label_ids", numberFieldMapping) docMapping.AddFieldMappingsAt("no_label", boolFieldMapping) docMapping.AddFieldMappingsAt("milestone_id", numberFieldMapping) docMapping.AddFieldMappingsAt("project_id", numberFieldMapping) docMapping.AddFieldMappingsAt("project_board_id", numberFieldMapping) docMapping.AddFieldMappingsAt("poster_id", numberFieldMapping) docMapping.AddFieldMappingsAt("assignee_id", numberFieldMapping) docMapping.AddFieldMappingsAt("mention_ids", numberFieldMapping) docMapping.AddFieldMappingsAt("reviewed_ids", numberFieldMapping) docMapping.AddFieldMappingsAt("review_requested_ids", numberFieldMapping) docMapping.AddFieldMappingsAt("subscriber_ids", numberFieldMapping) docMapping.AddFieldMappingsAt("updated_unix", numberFieldMapping) docMapping.AddFieldMappingsAt("created_unix", numberFieldMapping) docMapping.AddFieldMappingsAt("deadline_unix", numberFieldMapping) docMapping.AddFieldMappingsAt("comment_count", numberFieldMapping) if err := addUnicodeNormalizeTokenFilter(mapping); err != nil { return nil, err } else if err = mapping.AddCustomAnalyzer(conversationIndexerAnalyzer, map[string]any{ "type": custom.Name, "char_filters": []string{}, "tokenizer": unicode.Name, "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, }); err != nil { return nil, err } mapping.DefaultAnalyzer = conversationIndexerAnalyzer mapping.AddDocumentMapping(conversationIndexerDocType, docMapping) mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping()) mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() // disable default mapping, avoid indexing unexpected structs return mapping, nil } var _ internal.Indexer = &Indexer{} // Indexer implements Indexer interface type Indexer struct { inner *inner_bleve.Indexer indexer_internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much } // NewIndexer creates a new bleve local indexer func NewIndexer(indexDir string) *Indexer { inner := inner_bleve.NewIndexer(indexDir, conversationIndexerLatestVersion, generateConversationIndexMapping) return &Indexer{ Indexer: inner, inner: inner, } } // Index will save the index data func (b *Indexer) Index(_ context.Context, conversations ...*internal.IndexerData) error { batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) for _, conversation := range conversations { if err := batch.Index(indexer_internal.Base36(conversation.ID), (*IndexerData)(conversation)); err != nil { return err } } return batch.Flush() } // Delete deletes indexes by ids func (b *Indexer) Delete(_ context.Context, ids ...int64) error { batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) for _, id := range ids { if err := batch.Delete(indexer_internal.Base36(id)); err != nil { return err } } return batch.Flush() } // Search searches for conversations by given conditions. // Returns the matching conversation IDs func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (*internal.SearchResult, error) { var queries []query.Query if options.Keyword != "" { fuzziness := 0 if options.IsFuzzyKeyword { fuzziness = inner_bleve.GuessFuzzinessByKeyword(options.Keyword) } queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ inner_bleve.MatchPhraseQuery(options.Keyword, "title", conversationIndexerAnalyzer, fuzziness), inner_bleve.MatchPhraseQuery(options.Keyword, "content", conversationIndexerAnalyzer, fuzziness), inner_bleve.MatchPhraseQuery(options.Keyword, "comments", conversationIndexerAnalyzer, fuzziness), }...)) } if len(options.RepoIDs) > 0 || options.AllPublic { var repoQueries []query.Query for _, repoID := range options.RepoIDs { repoQueries = append(repoQueries, inner_bleve.NumericEqualityQuery(repoID, "repo_id")) } if options.AllPublic { repoQueries = append(repoQueries, inner_bleve.BoolFieldQuery(true, "is_public")) } queries = append(queries, bleve.NewDisjunctionQuery(repoQueries...)) } if options.UpdatedAfterUnix.Has() || options.UpdatedBeforeUnix.Has() { queries = append(queries, inner_bleve.NumericRangeInclusiveQuery( options.UpdatedAfterUnix, options.UpdatedBeforeUnix, "updated_unix")) } var indexerQuery query.Query = bleve.NewConjunctionQuery(queries...) if len(queries) == 0 { indexerQuery = bleve.NewMatchAllQuery() } skip, limit := indexer_internal.ParsePaginator(options.Paginator) search := bleve.NewSearchRequestOptions(indexerQuery, limit, skip, false) if options.SortBy == "" { options.SortBy = internal.SortByCreatedAsc } search.SortBy([]string{string(options.SortBy), "-_id"}) result, err := b.inner.Indexer.SearchInContext(ctx, search) if err != nil { return nil, err } ret := &internal.SearchResult{ Total: int64(result.Total), Hits: make([]internal.Match, 0, len(result.Hits)), } for _, hit := range result.Hits { id, err := indexer_internal.ParseBase36(hit.ID) if err != nil { return nil, err } ret.Hits = append(ret.Hits, internal.Match{ ID: id, }) } return ret, nil }