Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support repo code search without setting up an indexer #29998

Merged
merged 13 commits into from
Mar 24, 2024
117 changes: 117 additions & 0 deletions modules/git/grep.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package git

import (
"bufio"
"context"
"errors"
"fmt"
"io"
"os"
"strconv"
"strings"

"code.gitea.io/gitea/modules/util"
)

type GrepResult struct {
Filename string
LineNumbers []int
LineCodes []string
}

type GrepOptions struct {
RefName string
ContextLineNumber int
IsFuzzy bool
}

func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepOptions) ([]*GrepResult, error) {
stdoutReader, stdoutWriter, err := os.Pipe()
if err != nil {
return nil, fmt.Errorf("unable to creata os pipe to grep: %w", err)
silverwind marked this conversation as resolved.
Show resolved Hide resolved
}
stderrReader, stderrWriter, err := os.Pipe()
if err != nil {
return nil, fmt.Errorf("unable to creata os pipe to grep: %w", err)
silverwind marked this conversation as resolved.
Show resolved Hide resolved
}
defer func() {
_ = stdoutReader.Close()
_ = stdoutWriter.Close()
_ = stderrReader.Close()
_ = stderrWriter.Close()
}()

/*
The output is like this ( "^@" means \x00):

HEAD:.air.toml
6^@bin = "gitea"

HEAD:.changelog.yml
2^@repo: go-gitea/gitea
*/
var stderr []byte
var results []*GrepResult
cmd := NewCommand(ctx, "grep", "--null", "--break", "--heading", "--fixed-strings", "--line-number", "--ignore-case", "--full-name")
cmd.AddOptionValues("--context", fmt.Sprint(opts.ContextLineNumber))
if opts.IsFuzzy {
words := strings.Fields(search)
for _, word := range words {
cmd.AddOptionValues("-e", word)
}
} else {
cmd.AddOptionValues("-e", search)
}
cmd.AddDynamicArguments(util.IfZero(opts.RefName, "HEAD"))
err = cmd.Run(&RunOpts{
Dir: repo.Path,
Stdout: stdoutWriter,
Stderr: stderrWriter,
PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
_ = stdoutWriter.Close()
_ = stderrWriter.Close()
defer stdoutReader.Close()
defer stderrReader.Close()

isInBlock := false
scanner := bufio.NewScanner(stdoutReader)
var res *GrepResult
for scanner.Scan() {
line := scanner.Text()
if !isInBlock {
if _ /* ref */, filename, ok := strings.Cut(line, ":"); ok {
isInBlock = true
res = &GrepResult{Filename: filename}
results = append(results, res)
}
continue
}
if line == "" {
if len(results) >= 50 {
cancel()
break
}
isInBlock = false
continue
}
if line == "--" {
continue
}
if lineNum, lineCode, ok := strings.Cut(line, "\x00"); ok {
lineNumInt, _ := strconv.Atoi(lineNum)
res.LineNumbers = append(res.LineNumbers, lineNumInt)
res.LineCodes = append(res.LineCodes, lineCode)
}
}
stderr, _ = io.ReadAll(stderrReader)
return scanner.Err()
},
})
if err != nil && !errors.Is(err, context.Canceled) && len(stderr) != 0 {
return nil, fmt.Errorf("unable to run git grep: %w, stderr: %s", err, string(stderr))
}
return results, nil
}
37 changes: 37 additions & 0 deletions modules/git/grep_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package git

import (
"context"
"path/filepath"
"testing"

"github.com/stretchr/testify/assert"
)

func TestGrepSearch(t *testing.T) {
repo, err := openRepositoryWithDefaultContext(filepath.Join(testReposDir, "language_stats_repo"))
assert.NoError(t, err)
defer repo.Close()

res, err := GrepSearch(context.Background(), repo, "void", GrepOptions{})
assert.NoError(t, err)
assert.Equal(t, []*GrepResult{
{
Filename: "java-hello/main.java",
LineNumbers: []int{3},
LineCodes: []string{" public static void main(String[] args)"},
},
{
Filename: "main.vendor.java",
LineNumbers: []int{3},
LineCodes: []string{" public static void main(String[] args)"},
},
}, res)

res, err = GrepSearch(context.Background(), repo, "no-such-content", GrepOptions{})
assert.NoError(t, err)
assert.Len(t, res, 0)
}
35 changes: 18 additions & 17 deletions modules/indexer/code/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,27 @@ func writeStrings(buf *bytes.Buffer, strs ...string) error {
return nil
}

func HighlightSearchResultCode(filename string, lineNums []int, code string) []ResultLine {
// we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting
hl, _ := highlight.Code(filename, "", code)
highlightedLines := strings.Split(string(hl), "\n")

// The lineNums outputted by highlight.Code might not match the original lineNums, because "highlight" removes the last `\n`
lines := make([]ResultLine, min(len(highlightedLines), len(lineNums)))
for i := 0; i < len(lines); i++ {
lines[i].Num = lineNums[i]
lines[i].FormattedContent = template.HTML(highlightedLines[i])
}
return lines
}

func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Result, error) {
startLineNum := 1 + strings.Count(result.Content[:startIndex], "\n")

var formattedLinesBuffer bytes.Buffer

contentLines := strings.SplitAfter(result.Content[startIndex:endIndex], "\n")
lines := make([]ResultLine, 0, len(contentLines))
lineNums := make([]int, 0, len(contentLines))
index := startIndex
for i, line := range contentLines {
var err error
Expand All @@ -91,37 +105,24 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res
line[closeActiveIndex:],
)
} else {
err = writeStrings(&formattedLinesBuffer,
line,
)
err = writeStrings(&formattedLinesBuffer, line)
}
if err != nil {
return nil, err
}

lines = append(lines, ResultLine{Num: startLineNum + i})
lineNums = append(lineNums, startLineNum+i)
index += len(line)
}

// we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting
hl, _ := highlight.Code(result.Filename, "", formattedLinesBuffer.String())
highlightedLines := strings.Split(string(hl), "\n")

// The lines outputted by highlight.Code might not match the original lines, because "highlight" removes the last `\n`
lines = lines[:min(len(highlightedLines), len(lines))]
highlightedLines = highlightedLines[:len(lines)]
for i := 0; i < len(lines); i++ {
lines[i].FormattedContent = template.HTML(highlightedLines[i])
}

return &Result{
RepoID: result.RepoID,
Filename: result.Filename,
CommitID: result.CommitID,
UpdatedUnix: result.UpdatedUnix,
Language: result.Language,
Color: result.Color,
Lines: lines,
Lines: HighlightSearchResultCode(result.Filename, lineNums, formattedLinesBuffer.String()),
}, nil
}

Expand Down
66 changes: 45 additions & 21 deletions routers/web/repo/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ package repo

import (
"net/http"
"strings"

"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/git"
code_indexer "code.gitea.io/gitea/modules/indexer/code"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/services/context"
Expand All @@ -17,11 +19,6 @@ const tplSearch base.TplName = "repo/search"

// Search render repository search page
func Search(ctx *context.Context) {
if !setting.Indexer.RepoIndexerEnabled {
ctx.Redirect(ctx.Repo.RepoLink)
return
}

language := ctx.FormTrim("l")
keyword := ctx.FormTrim("q")

Expand All @@ -42,24 +39,51 @@ func Search(ctx *context.Context) {
page = 1
}

total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
RepoIDs: []int64{ctx.Repo.Repository.ID},
Keyword: keyword,
IsKeywordFuzzy: isFuzzy,
Language: language,
Paginator: &db.ListOptions{
Page: page,
PageSize: setting.UI.RepoSearchPagingNum,
},
})
if err != nil {
if code_indexer.IsAvailable(ctx) {
ctx.ServerError("SearchResults", err)
return
var total int
var searchResults []*code_indexer.Result
var searchResultLanguages []*code_indexer.SearchResultLanguages
if setting.Indexer.RepoIndexerEnabled {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would suggest to intigrate the git grep search as its own indexer and set it as default.

This way it is transparent for webUI or API what to do.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No idea how to do that clearly, and I am not a fan of adding a lot of "options".

If you have better ideas, free free to edit this PR directly or have some following PRs.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a prompt like this, maybe it could make it clearer. What do you think?

image

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that will help but is unrelated to the architecture idea of mine.

I try to create a pull request to your branch that would move acording to my proposal, so it can be checked out and tested etc ...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would expect that there is no new option to be introduced.

var err error
total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
RepoIDs: []int64{ctx.Repo.Repository.ID},
Keyword: keyword,
IsKeywordFuzzy: isFuzzy,
Language: language,
Paginator: &db.ListOptions{
Page: page,
PageSize: setting.UI.RepoSearchPagingNum,
},
})
if err != nil {
if code_indexer.IsAvailable(ctx) {
ctx.ServerError("SearchResults", err)
return
}
ctx.Data["CodeIndexerUnavailable"] = true
} else {
ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx)
}
ctx.Data["CodeIndexerUnavailable"] = true
} else {
ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx)
res, err := git.GrepSearch(ctx, ctx.Repo.GitRepo, keyword, git.GrepOptions{ContextLineNumber: 3, IsFuzzy: isFuzzy})
if err != nil {
ctx.ServerError("GrepSearch", err)
return
}
total = len(res)
pageStart := min((page-1)*setting.UI.RepoSearchPagingNum, len(res))
pageEnd := min(page*setting.UI.RepoSearchPagingNum, len(res))
res = res[pageStart:pageEnd]
for _, r := range res {
searchResults = append(searchResults, &code_indexer.Result{
RepoID: ctx.Repo.Repository.ID,
Filename: r.Filename,
CommitID: ctx.Repo.CommitID,
// UpdatedUnix: not supported yet
// Language: not supported yet
// Color: not supported yet
Lines: code_indexer.HighlightSearchResultCode(r.Filename, r.LineNumbers, strings.Join(r.LineCodes, "\n")),
})
}
}

ctx.Data["Repo"] = ctx.Repo.Repository
Expand Down
23 changes: 7 additions & 16 deletions templates/repo/home.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,18 @@
{{template "base/alert" .}}
{{template "repo/code/recently_pushed_new_branches" .}}
{{if and (not .HideRepoInfo) (not .IsBlame)}}
<div class="ui repo-description gt-word-break">
<div id="repo-desc" class="gt-font-16">
<div class="repo-description">
<div id="repo-desc" class="gt-word-break gt-font-16">
{{$description := .Repository.DescriptionHTML $.Context}}
{{if $description}}<span class="description">{{$description | RenderCodeBlock}}</span>{{else if .IsRepositoryAdmin}}<span class="no-description text-italic">{{ctx.Locale.Tr "repo.no_desc"}}</span>{{end}}
<a class="link" href="{{.Repository.Website}}">{{.Repository.Website}}</a>
</div>
{{if .RepoSearchEnabled}}
<div class="ui repo-search">
<form class="ui form ignore-dirty" action="{{.RepoLink}}/search" method="get">
<div class="field">
<div class="ui small action input{{if .CodeIndexerUnavailable}} disabled left icon{{end}}"{{if .CodeIndexerUnavailable}} data-tooltip-content="{{ctx.Locale.Tr "search.code_search_unavailable"}}"{{end}}>
<input name="q" value="{{.Keyword}}"{{if .CodeIndexerUnavailable}} disabled{{end}} placeholder="{{ctx.Locale.Tr "search.code_kind"}}">
{{if .CodeIndexerUnavailable}}
<i class="icon">{{svg "octicon-alert"}}</i>
{{end}}
{{template "shared/search/button" dict "Disabled" .CodeIndexerUnavailable}}
</div>
</div>
</form>
<form class="ignore-dirty" action="{{.RepoLink}}/search" method="get">
<div class="ui small action input">
<input name="q" value="{{.Keyword}}" placeholder="{{ctx.Locale.Tr "search.code_kind"}}">
{{template "shared/search/button"}}
</div>
{{end}}
</form>
</div>
<div class="tw-flex tw-content-center tw-flex-wrap gt-gap-2" id="repo-topics">
{{range .Topics}}<a class="ui repo-topic large label topic gt-m-0" href="{{AppSubUrl}}/explore/repos?q={{.Name}}&topic=1">{{.Name}}</a>{{end}}
Expand Down
2 changes: 2 additions & 0 deletions templates/shared/searchbottom.tmpl
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{{if or .result.Language (not .result.UpdatedUnix.IsZero)}}
<div class="ui bottom attached table segment tw-flex tw-content-center tw-justify-between">
<div class="tw-flex tw-content-center gt-ml-4">
{{if .result.Language}}
Expand All @@ -10,3 +11,4 @@
{{end}}
</div>
</div>
{{end}}
silverwind marked this conversation as resolved.
Show resolved Hide resolved