Support repo code search without setting up an indexer ()

By using git's ability, end users (especially small instance users) do
not need to enable the indexer, they could also benefit from the code
searching feature.

Fix 


![image](https://github.com/go-gitea/gitea/assets/2114189/11b7e458-88a4-480d-b4d7-72ee59406dd1)


![image](https://github.com/go-gitea/gitea/assets/2114189/0fe777d5-c95c-4288-a818-0427680805b6)

---------

Co-authored-by: silverwind <me@silverwind.io>
This commit is contained in:
wxiaoguang 2024-03-25 00:05:00 +08:00 committed by GitHub
parent 90a4f9a49e
commit 4734d43e14
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 253 additions and 65 deletions
docs/content
modules
options/locale
routers/web/repo
templates

@ -17,6 +17,12 @@ menu:
# Repository indexer # Repository indexer
## Builtin repository code search without indexer
Users could do repository-level code search without setting up a repository indexer.
The builtin code search is based on the `git grep` command, which is fast and efficient for small repositories.
Better code search support could be achieved by setting up the repository indexer.
## Setting up the repository indexer ## Setting up the repository indexer
Gitea can search through the files of the repositories by enabling this function in your [`app.ini`](administration/config-cheat-sheet.md): Gitea can search through the files of the repositories by enabling this function in your [`app.ini`](administration/config-cheat-sheet.md):

@ -87,6 +87,9 @@ _Symbols used in table:_
| Git Blame | ✓ | ✘ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | | Git Blame | ✓ | ✘ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
| Visual comparison of image changes | ✓ | ✘ | ✓ | ? | ? | ? | ✘ | ✘ | | Visual comparison of image changes | ✓ | ✘ | ✓ | ? | ? | ? | ✘ | ✘ |
- Gitea has builtin repository-level code search
- Better code search support could be achieved by [using a repository indexer](administration/repo-indexer.md)
## Issue Tracker ## Issue Tracker
| Feature | Gitea | Gogs | GitHub EE | GitLab CE | GitLab EE | BitBucket | RhodeCode CE | RhodeCode EE | | Feature | Gitea | Gogs | GitHub EE | GitLab CE | GitLab EE | BitBucket | RhodeCode CE | RhodeCode EE |

@ -367,7 +367,6 @@ type RunStdError interface {
error error
Unwrap() error Unwrap() error
Stderr() string Stderr() string
IsExitCode(code int) bool
} }
type runStdError struct { type runStdError struct {
@ -392,9 +391,9 @@ func (r *runStdError) Stderr() string {
return r.stderr return r.stderr
} }
func (r *runStdError) IsExitCode(code int) bool { func IsErrorExitCode(err error, code int) bool {
var exitError *exec.ExitError var exitError *exec.ExitError
if errors.As(r.err, &exitError) { if errors.As(err, &exitError) {
return exitError.ExitCode() == code return exitError.ExitCode() == code
} }
return false return false

@ -341,7 +341,7 @@ func checkGitVersionCompatibility(gitVer *version.Version) error {
func configSet(key, value string) error { func configSet(key, value string) error {
stdout, _, err := NewCommand(DefaultContext, "config", "--global", "--get").AddDynamicArguments(key).RunStdString(nil) stdout, _, err := NewCommand(DefaultContext, "config", "--global", "--get").AddDynamicArguments(key).RunStdString(nil)
if err != nil && !err.IsExitCode(1) { if err != nil && !IsErrorExitCode(err, 1) {
return fmt.Errorf("failed to get git config %s, err: %w", key, err) return fmt.Errorf("failed to get git config %s, err: %w", key, err)
} }
@ -364,7 +364,7 @@ func configSetNonExist(key, value string) error {
// already exist // already exist
return nil return nil
} }
if err.IsExitCode(1) { if IsErrorExitCode(err, 1) {
// not exist, set new config // not exist, set new config
_, _, err = NewCommand(DefaultContext, "config", "--global").AddDynamicArguments(key, value).RunStdString(nil) _, _, err = NewCommand(DefaultContext, "config", "--global").AddDynamicArguments(key, value).RunStdString(nil)
if err != nil { if err != nil {
@ -382,7 +382,7 @@ func configAddNonExist(key, value string) error {
// already exist // already exist
return nil return nil
} }
if err.IsExitCode(1) { if IsErrorExitCode(err, 1) {
// not exist, add new config // not exist, add new config
_, _, err = NewCommand(DefaultContext, "config", "--global", "--add").AddDynamicArguments(key, value).RunStdString(nil) _, _, err = NewCommand(DefaultContext, "config", "--global", "--add").AddDynamicArguments(key, value).RunStdString(nil)
if err != nil { if err != nil {
@ -403,7 +403,7 @@ func configUnsetAll(key, value string) error {
} }
return nil return nil
} }
if err.IsExitCode(1) { if IsErrorExitCode(err, 1) {
// not exist // not exist
return nil return nil
} }

112
modules/git/grep.go Normal file

@ -0,0 +1,112 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package git
import (
"bufio"
"bytes"
"context"
"errors"
"fmt"
"os"
"strconv"
"strings"
"code.gitea.io/gitea/modules/util"
)
type GrepResult struct {
Filename string
LineNumbers []int
LineCodes []string
}
type GrepOptions struct {
RefName string
ContextLineNumber int
IsFuzzy bool
}
func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepOptions) ([]*GrepResult, error) {
stdoutReader, stdoutWriter, err := os.Pipe()
if err != nil {
return nil, fmt.Errorf("unable to create os pipe to grep: %w", err)
}
defer func() {
_ = stdoutReader.Close()
_ = stdoutWriter.Close()
}()
/*
The output is like this ( "^@" means \x00):
HEAD:.air.toml
6^@bin = "gitea"
HEAD:.changelog.yml
2^@repo: go-gitea/gitea
*/
var results []*GrepResult
cmd := NewCommand(ctx, "grep", "--null", "--break", "--heading", "--fixed-strings", "--line-number", "--ignore-case", "--full-name")
cmd.AddOptionValues("--context", fmt.Sprint(opts.ContextLineNumber))
if opts.IsFuzzy {
words := strings.Fields(search)
for _, word := range words {
cmd.AddOptionValues("-e", strings.TrimLeft(word, "-"))
}
} else {
cmd.AddOptionValues("-e", strings.TrimLeft(search, "-"))
}
cmd.AddDynamicArguments(util.IfZero(opts.RefName, "HEAD"))
stderr := bytes.Buffer{}
err = cmd.Run(&RunOpts{
Dir: repo.Path,
Stdout: stdoutWriter,
Stderr: &stderr,
PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
_ = stdoutWriter.Close()
defer stdoutReader.Close()
isInBlock := false
scanner := bufio.NewScanner(stdoutReader)
var res *GrepResult
for scanner.Scan() {
line := scanner.Text()
if !isInBlock {
if _ /* ref */, filename, ok := strings.Cut(line, ":"); ok {
isInBlock = true
res = &GrepResult{Filename: filename}
results = append(results, res)
}
continue
}
if line == "" {
if len(results) >= 50 {
cancel()
break
}
isInBlock = false
continue
}
if line == "--" {
continue
}
if lineNum, lineCode, ok := strings.Cut(line, "\x00"); ok {
lineNumInt, _ := strconv.Atoi(lineNum)
res.LineNumbers = append(res.LineNumbers, lineNumInt)
res.LineCodes = append(res.LineCodes, lineCode)
}
}
return scanner.Err()
},
})
// git grep exits with 1 if no results are found
if IsErrorExitCode(err, 1) && stderr.Len() == 0 {
return nil, nil
}
if err != nil && !errors.Is(err, context.Canceled) {
return nil, fmt.Errorf("unable to run git grep: %w, stderr: %s", err, stderr.String())
}
return results, nil
}

41
modules/git/grep_test.go Normal file

@ -0,0 +1,41 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package git
import (
"context"
"path/filepath"
"testing"
"github.com/stretchr/testify/assert"
)
func TestGrepSearch(t *testing.T) {
repo, err := openRepositoryWithDefaultContext(filepath.Join(testReposDir, "language_stats_repo"))
assert.NoError(t, err)
defer repo.Close()
res, err := GrepSearch(context.Background(), repo, "void", GrepOptions{})
assert.NoError(t, err)
assert.Equal(t, []*GrepResult{
{
Filename: "java-hello/main.java",
LineNumbers: []int{3},
LineCodes: []string{" public static void main(String[] args)"},
},
{
Filename: "main.vendor.java",
LineNumbers: []int{3},
LineCodes: []string{" public static void main(String[] args)"},
},
}, res)
res, err = GrepSearch(context.Background(), repo, "no-such-content", GrepOptions{})
assert.NoError(t, err)
assert.Len(t, res, 0)
res, err = GrepSearch(context.Background(), &Repository{Path: "no-such-git-repo"}, "no-such-content", GrepOptions{})
assert.Error(t, err)
assert.Len(t, res, 0)
}

@ -70,13 +70,27 @@ func writeStrings(buf *bytes.Buffer, strs ...string) error {
return nil return nil
} }
func HighlightSearchResultCode(filename string, lineNums []int, code string) []ResultLine {
// we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting
hl, _ := highlight.Code(filename, "", code)
highlightedLines := strings.Split(string(hl), "\n")
// The lineNums outputted by highlight.Code might not match the original lineNums, because "highlight" removes the last `\n`
lines := make([]ResultLine, min(len(highlightedLines), len(lineNums)))
for i := 0; i < len(lines); i++ {
lines[i].Num = lineNums[i]
lines[i].FormattedContent = template.HTML(highlightedLines[i])
}
return lines
}
func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Result, error) { func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Result, error) {
startLineNum := 1 + strings.Count(result.Content[:startIndex], "\n") startLineNum := 1 + strings.Count(result.Content[:startIndex], "\n")
var formattedLinesBuffer bytes.Buffer var formattedLinesBuffer bytes.Buffer
contentLines := strings.SplitAfter(result.Content[startIndex:endIndex], "\n") contentLines := strings.SplitAfter(result.Content[startIndex:endIndex], "\n")
lines := make([]ResultLine, 0, len(contentLines)) lineNums := make([]int, 0, len(contentLines))
index := startIndex index := startIndex
for i, line := range contentLines { for i, line := range contentLines {
var err error var err error
@ -91,29 +105,16 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res
line[closeActiveIndex:], line[closeActiveIndex:],
) )
} else { } else {
err = writeStrings(&formattedLinesBuffer, err = writeStrings(&formattedLinesBuffer, line)
line,
)
} }
if err != nil { if err != nil {
return nil, err return nil, err
} }
lines = append(lines, ResultLine{Num: startLineNum + i}) lineNums = append(lineNums, startLineNum+i)
index += len(line) index += len(line)
} }
// we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting
hl, _ := highlight.Code(result.Filename, "", formattedLinesBuffer.String())
highlightedLines := strings.Split(string(hl), "\n")
// The lines outputted by highlight.Code might not match the original lines, because "highlight" removes the last `\n`
lines = lines[:min(len(highlightedLines), len(lines))]
highlightedLines = highlightedLines[:len(lines)]
for i := 0; i < len(lines); i++ {
lines[i].FormattedContent = template.HTML(highlightedLines[i])
}
return &Result{ return &Result{
RepoID: result.RepoID, RepoID: result.RepoID,
Filename: result.Filename, Filename: result.Filename,
@ -121,7 +122,7 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res
UpdatedUnix: result.UpdatedUnix, UpdatedUnix: result.UpdatedUnix,
Language: result.Language, Language: result.Language,
Color: result.Color, Color: result.Color,
Lines: lines, Lines: HighlightSearchResultCode(result.Filename, lineNums, formattedLinesBuffer.String()),
}, nil }, nil
} }

@ -172,6 +172,7 @@ org_kind = Search orgs...
team_kind = Search teams... team_kind = Search teams...
code_kind = Search code... code_kind = Search code...
code_search_unavailable = Code search is currently not available. Please contact the site administrator. code_search_unavailable = Code search is currently not available. Please contact the site administrator.
code_search_by_git_grep = Current code search results are provided by "git grep". There might be better results if site administrator enables Repository Indexer.
package_kind = Search packages... package_kind = Search packages...
project_kind = Search projects... project_kind = Search projects...
branch_kind = Search branches... branch_kind = Search branches...

@ -5,9 +5,11 @@ package repo
import ( import (
"net/http" "net/http"
"strings"
"code.gitea.io/gitea/models/db" "code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/git"
code_indexer "code.gitea.io/gitea/modules/indexer/code" code_indexer "code.gitea.io/gitea/modules/indexer/code"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/services/context" "code.gitea.io/gitea/services/context"
@ -17,11 +19,6 @@ const tplSearch base.TplName = "repo/search"
// Search render repository search page // Search render repository search page
func Search(ctx *context.Context) { func Search(ctx *context.Context) {
if !setting.Indexer.RepoIndexerEnabled {
ctx.Redirect(ctx.Repo.RepoLink)
return
}
language := ctx.FormTrim("l") language := ctx.FormTrim("l")
keyword := ctx.FormTrim("q") keyword := ctx.FormTrim("q")
@ -42,26 +39,54 @@ func Search(ctx *context.Context) {
page = 1 page = 1
} }
total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{ var total int
RepoIDs: []int64{ctx.Repo.Repository.ID}, var searchResults []*code_indexer.Result
Keyword: keyword, var searchResultLanguages []*code_indexer.SearchResultLanguages
IsKeywordFuzzy: isFuzzy, if setting.Indexer.RepoIndexerEnabled {
Language: language, var err error
Paginator: &db.ListOptions{ total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
Page: page, RepoIDs: []int64{ctx.Repo.Repository.ID},
PageSize: setting.UI.RepoSearchPagingNum, Keyword: keyword,
}, IsKeywordFuzzy: isFuzzy,
}) Language: language,
if err != nil { Paginator: &db.ListOptions{
if code_indexer.IsAvailable(ctx) { Page: page,
ctx.ServerError("SearchResults", err) PageSize: setting.UI.RepoSearchPagingNum,
},
})
if err != nil {
if code_indexer.IsAvailable(ctx) {
ctx.ServerError("SearchResults", err)
return
}
ctx.Data["CodeIndexerUnavailable"] = true
} else {
ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx)
}
} else {
res, err := git.GrepSearch(ctx, ctx.Repo.GitRepo, keyword, git.GrepOptions{ContextLineNumber: 3, IsFuzzy: isFuzzy})
if err != nil {
ctx.ServerError("GrepSearch", err)
return return
} }
ctx.Data["CodeIndexerUnavailable"] = true total = len(res)
} else { pageStart := min((page-1)*setting.UI.RepoSearchPagingNum, len(res))
ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx) pageEnd := min(page*setting.UI.RepoSearchPagingNum, len(res))
res = res[pageStart:pageEnd]
for _, r := range res {
searchResults = append(searchResults, &code_indexer.Result{
RepoID: ctx.Repo.Repository.ID,
Filename: r.Filename,
CommitID: ctx.Repo.CommitID,
// UpdatedUnix: not supported yet
// Language: not supported yet
// Color: not supported yet
Lines: code_indexer.HighlightSearchResultCode(r.Filename, r.LineNumbers, strings.Join(r.LineCodes, "\n")),
})
}
} }
ctx.Data["CodeIndexerEnabled"] = setting.Indexer.RepoIndexerEnabled
ctx.Data["Repo"] = ctx.Repo.Repository ctx.Data["Repo"] = ctx.Repo.Repository
ctx.Data["SearchResults"] = searchResults ctx.Data["SearchResults"] = searchResults
ctx.Data["SearchResultLanguages"] = searchResultLanguages ctx.Data["SearchResultLanguages"] = searchResultLanguages

@ -5,27 +5,18 @@
{{template "base/alert" .}} {{template "base/alert" .}}
{{template "repo/code/recently_pushed_new_branches" .}} {{template "repo/code/recently_pushed_new_branches" .}}
{{if and (not .HideRepoInfo) (not .IsBlame)}} {{if and (not .HideRepoInfo) (not .IsBlame)}}
<div class="ui repo-description gt-word-break"> <div class="repo-description">
<div id="repo-desc" class="tw-text-16"> <div id="repo-desc" class="gt-word-break tw-text-16">
{{$description := .Repository.DescriptionHTML $.Context}} {{$description := .Repository.DescriptionHTML $.Context}}
{{if $description}}<span class="description">{{$description | RenderCodeBlock}}</span>{{else if .IsRepositoryAdmin}}<span class="no-description text-italic">{{ctx.Locale.Tr "repo.no_desc"}}</span>{{end}} {{if $description}}<span class="description">{{$description | RenderCodeBlock}}</span>{{else if .IsRepositoryAdmin}}<span class="no-description text-italic">{{ctx.Locale.Tr "repo.no_desc"}}</span>{{end}}
<a class="link" href="{{.Repository.Website}}">{{.Repository.Website}}</a> <a class="link" href="{{.Repository.Website}}">{{.Repository.Website}}</a>
</div> </div>
{{if .RepoSearchEnabled}} <form class="ignore-dirty" action="{{.RepoLink}}/search" method="get">
<div class="ui repo-search"> <div class="ui small action input">
<form class="ui form ignore-dirty" action="{{.RepoLink}}/search" method="get"> <input name="q" value="{{.Keyword}}" placeholder="{{ctx.Locale.Tr "search.code_kind"}}">
<div class="field"> {{template "shared/search/button"}}
<div class="ui small action input{{if .CodeIndexerUnavailable}} disabled left icon{{end}}"{{if .CodeIndexerUnavailable}} data-tooltip-content="{{ctx.Locale.Tr "search.code_search_unavailable"}}"{{end}}>
<input name="q" value="{{.Keyword}}"{{if .CodeIndexerUnavailable}} disabled{{end}} placeholder="{{ctx.Locale.Tr "search.code_kind"}}">
{{if .CodeIndexerUnavailable}}
<i class="icon">{{svg "octicon-alert"}}</i>
{{end}}
{{template "shared/search/button" dict "Disabled" .CodeIndexerUnavailable}}
</div>
</div>
</form>
</div> </div>
{{end}} </form>
</div> </div>
<div class="tw-flex tw-items-center tw-flex-wrap tw-gap-1" id="repo-topics"> <div class="tw-flex tw-items-center tw-flex-wrap tw-gap-1" id="repo-topics">
{{range .Topics}}<a class="ui repo-topic large label topic gt-m-0" href="{{AppSubUrl}}/explore/repos?q={{.Name}}&topic=1">{{.Name}}</a>{{end}} {{range .Topics}}<a class="ui repo-topic large label topic gt-m-0" href="{{AppSubUrl}}/explore/repos?q={{.Name}}&topic=1">{{.Name}}</a>{{end}}

@ -7,9 +7,16 @@
<div class="ui error message"> <div class="ui error message">
<p>{{ctx.Locale.Tr "search.code_search_unavailable"}}</p> <p>{{ctx.Locale.Tr "search.code_search_unavailable"}}</p>
</div> </div>
{{else if .SearchResults}} {{else}}
{{template "shared/search/code/results" .}} {{if not .CodeIndexerEnabled}}
{{else if .Keyword}} <div class="ui message">
<div>{{ctx.Locale.Tr "search.no_results"}}</div> <p>{{ctx.Locale.Tr "search.code_search_by_git_grep"}}</p>
</div>
{{end}}
{{if .SearchResults}}
{{template "shared/search/code/results" .}}
{{else if .Keyword}}
<div>{{ctx.Locale.Tr "search.no_results"}}</div>
{{end}}
{{end}} {{end}}
</div> </div>

@ -1,3 +1,4 @@
{{if or .result.Language (not .result.UpdatedUnix.IsZero)}}
<div class="ui bottom attached table segment tw-flex tw-items-center tw-justify-between"> <div class="ui bottom attached table segment tw-flex tw-items-center tw-justify-between">
<div class="tw-flex tw-items-center gt-ml-4"> <div class="tw-flex tw-items-center gt-ml-4">
{{if .result.Language}} {{if .result.Language}}
@ -10,3 +11,4 @@
{{end}} {{end}}
</div> </div>
</div> </div>
{{end}}