Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add .gitattribute assisted language detection to blame, diff and render #17590

Merged
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/content/doc/advanced/config-cheat-sheet.en-us.md
Original file line number Diff line number Diff line change
Expand Up @@ -982,6 +982,14 @@ Multiple sanitisation rules can be defined by adding unique subsections, e.g. `[
To apply a sanitisation rules only for a specify external renderer they must use the renderer name, e.g. `[markup.sanitizer.asciidoc.rule-1]`.
If the rule is defined above the renderer ini section or the name does not match a renderer it is applied to every renderer.

## Highlight Mappings (`highlight.mapping`)

- `file_extension e.g. .toml`: **language e.g. ini**. File extension to language mapping overrides.

- Gitea will highlight files using the `linguist-language` or `gitlab-language` attribute from the `.gitattributes` file
if available. If this is not set or the language is unavailable, the file extension will be looked up
in this mapping or the filetype using heuristics.

## Time (`time`)

- `FORMAT`: Time format to display on UI. i.e. RFC1123 or 2006-01-02 15:04:05
Expand Down
17 changes: 16 additions & 1 deletion modules/git/repo_attribute.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ type CheckAttributeOpts struct {
AllAttributes bool
Attributes []string
Filenames []string
IndexFile string
WorkTree string
}

// CheckAttribute return the Blame object of file
Expand All @@ -31,6 +33,19 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[
return nil, fmt.Errorf("git version missing: %v", err)
}

env := []string{}

if len(opts.IndexFile) > 0 && CheckGitVersionAtLeast("1.7.8") == nil {
env = append(env, "GIT_INDEX_FILE="+opts.IndexFile)
}
if len(opts.WorkTree) > 0 && CheckGitVersionAtLeast("1.7.8") == nil {
env = append(env, "GIT_WORK_TREE="+opts.WorkTree)
}

if len(env) > 0 {
env = append(os.Environ(), env...)
}

stdOut := new(bytes.Buffer)
stdErr := new(bytes.Buffer)

Expand Down Expand Up @@ -61,7 +76,7 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[

cmd := NewCommand(cmdArgs...)

if err := cmd.RunInDirPipeline(repo.Path, stdOut, stdErr); err != nil {
if err := cmd.RunInDirTimeoutEnvPipeline(env, -1, repo.Path, stdOut, stdErr); err != nil {
return nil, fmt.Errorf("failed to run check-attr: %v\n%s\n%s", err, stdOut.String(), stdErr.String())
}

Expand Down
12 changes: 7 additions & 5 deletions modules/git/repo_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"bytes"
"context"
"os"
"path/filepath"
"strings"

"code.gitea.io/gitea/modules/log"
Expand Down Expand Up @@ -45,22 +46,23 @@ func (repo *Repository) readTreeToIndex(id SHA1, indexFilename ...string) error
}

// ReadTreeToTemporaryIndex reads a treeish to a temporary index file
func (repo *Repository) ReadTreeToTemporaryIndex(treeish string) (filename string, cancel context.CancelFunc, err error) {
tmpIndex, err := os.CreateTemp("", "index")
func (repo *Repository) ReadTreeToTemporaryIndex(treeish string) (filename, tmpDir string, cancel context.CancelFunc, err error) {
tmpDir, err = os.MkdirTemp("", "index")
if err != nil {
return
}
filename = tmpIndex.Name()

filename = filepath.Join(tmpDir, ".tmp-index")
cancel = func() {
err := util.Remove(filename)
err := util.RemoveAll(tmpDir)
if err != nil {
log.Error("failed to remove tmp index file: %v", err)
}
}
err = repo.ReadTreeToIndex(treeish, filename)
if err != nil {
defer cancel()
return "", func() {}, err
return "", "", func() {}, err
}
return
}
Expand Down
55 changes: 28 additions & 27 deletions modules/git/repo_language_stats_gogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,9 @@ import (
"bytes"
"context"
"io"
"os"

"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/util"

"github.com/go-enry/go-enry/v2"
"github.com/go-git/go-git/v5"
Expand Down Expand Up @@ -48,35 +46,28 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
var checker *CheckAttributeReader

if CheckGitVersionAtLeast("1.7.8") == nil {
indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
indexFilename, workTree, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
if err == nil {
defer deleteTemporaryFile()
tmpWorkTree, err := os.MkdirTemp("", "empty-work-dir")
if err == nil {
defer func() {
_ = util.RemoveAll(tmpWorkTree)
checker = &CheckAttributeReader{
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language"},
Repo: repo,
IndexFile: indexFilename,
WorkTree: workTree,
}
ctx, cancel := context.WithCancel(DefaultContext)
if err := checker.Init(ctx); err != nil {
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
} else {
go func() {
err = checker.Run()
if err != nil {
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
cancel()
}
}()

checker = &CheckAttributeReader{
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
Repo: repo,
IndexFile: indexFilename,
WorkTree: tmpWorkTree,
}
ctx, cancel := context.WithCancel(DefaultContext)
if err := checker.Init(ctx); err != nil {
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
} else {
go func() {
err = checker.Run()
if err != nil {
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
cancel()
}
}()
}
defer cancel()
}
defer cancel()
}
}

Expand Down Expand Up @@ -113,6 +104,16 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err

sizes[language] += f.Size

return nil
} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
zeripath marked this conversation as resolved.
Show resolved Hide resolved
// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry.GetLanguageGroup(language)
if len(group) != 0 {
language = group
}

sizes[language] += f.Size

return nil
}
}
Expand Down
55 changes: 28 additions & 27 deletions modules/git/repo_language_stats_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,9 @@ import (
"context"
"io"
"math"
"os"

"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/util"

"github.com/go-enry/go-enry/v2"
)
Expand Down Expand Up @@ -68,35 +66,28 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
var checker *CheckAttributeReader

if CheckGitVersionAtLeast("1.7.8") == nil {
indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
indexFilename, worktree, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
if err == nil {
defer deleteTemporaryFile()
tmpWorkTree, err := os.MkdirTemp("", "empty-work-dir")
if err == nil {
defer func() {
_ = util.RemoveAll(tmpWorkTree)
checker = &CheckAttributeReader{
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language"},
Repo: repo,
IndexFile: indexFilename,
WorkTree: worktree,
}
ctx, cancel := context.WithCancel(DefaultContext)
if err := checker.Init(ctx); err != nil {
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
} else {
go func() {
err = checker.Run()
if err != nil {
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
cancel()
}
}()

checker = &CheckAttributeReader{
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
Repo: repo,
IndexFile: indexFilename,
WorkTree: tmpWorkTree,
}
ctx, cancel := context.WithCancel(DefaultContext)
if err := checker.Init(ctx); err != nil {
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
} else {
go func() {
err = checker.Run()
if err != nil {
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
cancel()
}
}()
}
defer cancel()
}
defer cancel()
}
}

Expand Down Expand Up @@ -136,9 +127,19 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
language = group
}

sizes[language] += f.Size()
continue
} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry.GetLanguageGroup(language)
if len(group) != 0 {
language = group
}

sizes[language] += f.Size()
continue
}

}
}

Expand Down
29 changes: 22 additions & 7 deletions modules/highlight/highlight.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ func NewContext() {
}

// Code returns a HTML version of code string with chroma syntax highlighting classes
func Code(fileName, code string) string {
func Code(fileName, language, code string) string {
NewContext()

// diff view newline will be passed as empty, change to literal \n so it can be copied
Expand All @@ -68,9 +68,16 @@ func Code(fileName, code string) string {
}

var lexer chroma.Lexer
if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
//use mapped value to find lexer
lexer = lexers.Get(val)

if len(language) > 0 {
lexer = lexers.Get(language)
}

if lexer == nil {
if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
//use mapped value to find lexer
lexer = lexers.Get(val)
}
}

if lexer == nil {
Expand Down Expand Up @@ -118,7 +125,7 @@ func CodeFromLexer(lexer chroma.Lexer, code string) string {
}

// File returns a slice of chroma syntax highlighted lines of code
func File(numLines int, fileName string, code []byte) []string {
func File(numLines int, fileName, language string, code []byte) []string {
NewContext()

if len(code) > sizeLimit {
Expand All @@ -138,8 +145,16 @@ func File(numLines int, fileName string, code []byte) []string {
htmlw := bufio.NewWriter(&htmlbuf)

var lexer chroma.Lexer
if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
lexer = lexers.Get(val)

// provided language overrides everything
if len(language) > 0 {
lexer = lexers.Get(language)
}

if lexer == nil {
if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
lexer = lexers.Get(val)
}
}

if lexer == nil {
Expand Down
2 changes: 1 addition & 1 deletion modules/highlight/highlight_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ steps:

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := File(tt.numLines, tt.fileName, []byte(tt.code)); !reflect.DeepEqual(got, tt.want) {
if got := File(tt.numLines, tt.fileName, "", []byte(tt.code)); !reflect.DeepEqual(got, tt.want) {
t.Errorf("File() = %v, want %v", got, tt.want)
}
})
Expand Down
2 changes: 1 addition & 1 deletion modules/indexer/code/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ func searchResult(result *SearchResult, startIndex, endIndex int) (*Result, erro
Language: result.Language,
Color: result.Color,
LineNumbers: lineNumbers,
FormattedLines: highlight.Code(result.Filename, formattedLinesBuffer.String()),
FormattedLines: highlight.Code(result.Filename, "", formattedLinesBuffer.String()),
}, nil
}

Expand Down
13 changes: 11 additions & 2 deletions modules/repofiles/diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

"code.gitea.io/gitea/models"
"code.gitea.io/gitea/models/unittest"
"code.gitea.io/gitea/modules/json"
"code.gitea.io/gitea/modules/test"
"code.gitea.io/gitea/services/gitdiff"

Expand Down Expand Up @@ -118,13 +119,21 @@ func TestGetDiffPreview(t *testing.T) {
t.Run("with given branch", func(t *testing.T) {
diff, err := GetDiffPreview(ctx.Repo.Repository, branch, treePath, content)
assert.NoError(t, err)
assert.EqualValues(t, expectedDiff, diff)
expectedBs, err := json.Marshal(expectedDiff)
assert.NoError(t, err)
bs, err := json.Marshal(diff)
assert.NoError(t, err)
assert.EqualValues(t, expectedBs, bs)
})

t.Run("empty branch, same results", func(t *testing.T) {
diff, err := GetDiffPreview(ctx.Repo.Repository, "", treePath, content)
assert.NoError(t, err)
assert.EqualValues(t, expectedDiff, diff)
expectedBs, err := json.Marshal(expectedDiff)
assert.NoError(t, err)
bs, err := json.Marshal(diff)
assert.NoError(t, err)
assert.EqualValues(t, expectedBs, bs)
})
}

Expand Down
Loading