Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
wxiaoguang committed Dec 13, 2023
1 parent 7304785 commit 0a25514
Show file tree
Hide file tree
Showing 15 changed files with 81 additions and 124 deletions.
3 changes: 3 additions & 0 deletions custom/conf/app.example.ini
Original file line number Diff line number Diff line change
Expand Up @@ -1212,6 +1212,9 @@ LEVEL = Info
;; Max size of files to be displayed (default is 8MiB)
;MAX_DISPLAY_FILE_SIZE = 8388608
;;
;; Detect ambiguous unicode characters in file contents and show warnings on the UI
;AMBIGUOUS_UNICODE_DETECTION = true
;;
;; Whether the email of the user should be shown in the Explore Users page
;SHOW_USER_EMAIL = true
;;
Expand Down
1 change: 1 addition & 0 deletions docs/content/administration/config-cheat-sheet.en-us.md
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ The following configuration set `Content-Type: application/vnd.android.package-a
- `THEMES`: **gitea-auto,gitea-light,gitea-dark**: All available themes. Allow users select personalized themes.
regardless of the value of `DEFAULT_THEME`.
- `MAX_DISPLAY_FILE_SIZE`: **8388608**: Max size of files to be displayed (default is 8MiB)
- `AMBIGUOUS_UNICODE_DETECTION`: **true**: Detect ambiguous unicode characters in file contents and show warnings on the UI
- `REACTIONS`: All available reactions users can choose on issues/prs and comments
Values can be emoji alias (:smile:) or a unicode emoji.
For custom reactions, add a tightly cropped square image to public/assets/img/emoji/reaction_name.png
Expand Down
59 changes: 10 additions & 49 deletions modules/charset/escape.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,31 @@
package charset

import (
"bufio"
"html/template"
"io"
"strings"

"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/translation"
)

// RuneNBSP is the codepoint for NBSP
const RuneNBSP = 0xa0

// EscapeControlHTML escapes the unicode control sequences in a provided html document
func EscapeControlHTML(text string, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output string) {
func EscapeControlHTML(html template.HTML, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output template.HTML) {
sb := &strings.Builder{}
outputStream := &HTMLStreamerWriter{Writer: sb}
streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)

if err := StreamHTML(strings.NewReader(text), streamer); err != nil {
streamer.escaped.HasError = true
log.Error("Error whilst escaping: %v", err)
}
return streamer.escaped, sb.String()
escaped, _ = EscapeControlReader(strings.NewReader(string(html)), sb, locale, allowed...) // err has been handled in EscapeControlReader
return escaped, template.HTML(sb.String())
}

// EscapeControlReaders escapes the unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus and the escaped []byte
// EscapeControlReader escapes the unicode control sequences in a provided reader of HTML content and writer in a locale and returns the findings as an EscapeStatus
func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) {
if !setting.UI.AmbiguousUnicodeDetection {
_, err = io.Copy(writer, reader)
return &EscapeStatus{}, err
}
outputStream := &HTMLStreamerWriter{Writer: writer}
streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)

Expand All @@ -43,41 +42,3 @@ func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.
}
return streamer.escaped, err
}

// EscapeControlStringReader escapes the unicode control sequences in a provided reader of string content and writer in a locale and returns the findings as an EscapeStatus and the escaped []byte. HTML line breaks are not inserted after every newline by this method.
func EscapeControlStringReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) {
bufRd := bufio.NewReader(reader)
outputStream := &HTMLStreamerWriter{Writer: writer}
streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)

for {
line, rdErr := bufRd.ReadString('\n')
if len(line) > 0 {
if err := streamer.Text(line); err != nil {
streamer.escaped.HasError = true
log.Error("Error whilst escaping: %v", err)
return streamer.escaped, err
}
}
if rdErr != nil {
if rdErr != io.EOF {
err = rdErr
}
break
}
}
return streamer.escaped, err
}

// EscapeControlString escapes the unicode control sequences in a provided string and returns the findings as an EscapeStatus and the escaped string
func EscapeControlString(text string, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output string) {
sb := &strings.Builder{}
outputStream := &HTMLStreamerWriter{Writer: sb}
streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)

if err := streamer.Text(text); err != nil {
streamer.escaped.HasError = true
log.Error("Error whilst escaping: %v", err)
}
return streamer.escaped, sb.String()
}
2 changes: 1 addition & 1 deletion modules/charset/escape_stream.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ func (e *escapeStreamer) Text(data string) error {
until, next = nextIdxs[0]+pos, nextIdxs[1]+pos
}

// from pos until until we know that the runes are not \r\t\n or even ' '
// from pos until we know that the runes are not \r\t\n or even ' '
runes := make([]rune, 0, next-until)
positions := make([]int, 0, next-until+1)

Expand Down
23 changes: 0 additions & 23 deletions modules/charset/escape_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,20 +132,6 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`,
},
}

func TestEscapeControlString(t *testing.T) {
for _, tt := range escapeControlTests {
t.Run(tt.name, func(t *testing.T) {
status, result := EscapeControlString(tt.text, &translation.MockLocale{})
if !reflect.DeepEqual(*status, tt.status) {
t.Errorf("EscapeControlString() status = %v, wanted= %v", status, tt.status)
}
if result != tt.result {
t.Errorf("EscapeControlString()\nresult= %v,\nwanted= %v", result, tt.result)
}
})
}
}

func TestEscapeControlReader(t *testing.T) {
// lets add some control characters to the tests
tests := make([]escapeControlTest, 0, len(escapeControlTests)*3)
Expand Down Expand Up @@ -186,12 +172,3 @@ func TestEscapeControlReader(t *testing.T) {
})
}
}

func TestEscapeControlReader_panic(t *testing.T) {
bs := make([]byte, 0, 20479)
bs = append(bs, 'A')
for i := 0; i < 6826; i++ {
bs = append(bs, []byte("—")...)
}
_, _ = EscapeControlString(string(bs), &translation.MockLocale{})
}
29 changes: 14 additions & 15 deletions modules/highlight/highlight.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"bytes"
"fmt"
gohtml "html"
"html/template"
"io"
"path/filepath"
"strings"
Expand Down Expand Up @@ -55,7 +56,7 @@ func NewContext() {
}

// Code returns a HTML version of code string with chroma syntax highlighting classes and the matched lexer name
func Code(fileName, language, code string) (string, string) {
func Code(fileName, language, code string) (output template.HTML, lexerName string) {
NewContext()

// diff view newline will be passed as empty, change to literal '\n' so it can be copied
Expand All @@ -65,7 +66,7 @@ func Code(fileName, language, code string) (string, string) {
}

if len(code) > sizeLimit {
return code, ""
return template.HTML(template.HTMLEscapeString(code)), ""
}

var lexer chroma.Lexer
Expand Down Expand Up @@ -102,13 +103,11 @@ func Code(fileName, language, code string) (string, string) {
cache.Add(fileName, lexer)
}

lexerName := formatLexerName(lexer.Config().Name)

return CodeFromLexer(lexer, code), lexerName
return CodeFromLexer(lexer, code), formatLexerName(lexer.Config().Name)
}

// CodeFromLexer returns a HTML version of code string with chroma syntax highlighting classes
func CodeFromLexer(lexer chroma.Lexer, code string) string {
func CodeFromLexer(lexer chroma.Lexer, code string) template.HTML {
formatter := html.New(html.WithClasses(true),
html.WithLineNumbers(false),
html.PreventSurroundingPre(true),
Expand All @@ -120,23 +119,23 @@ func CodeFromLexer(lexer chroma.Lexer, code string) string {
iterator, err := lexer.Tokenise(nil, code)
if err != nil {
log.Error("Can't tokenize code: %v", err)
return code
return template.HTML(template.HTMLEscapeString(code))
}
// style not used for live site but need to pass something
err = formatter.Format(htmlw, githubStyles, iterator)
if err != nil {
log.Error("Can't format code: %v", err)
return code
return template.HTML(template.HTMLEscapeString(code))
}

_ = htmlw.Flush()
// Chroma will add newlines for certain lexers in order to highlight them properly
// Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output
return strings.TrimSuffix(htmlbuf.String(), "\n")
return template.HTML(strings.TrimSuffix(htmlbuf.String(), "\n"))
}

// File returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name
func File(fileName, language string, code []byte) ([]string, string, error) {
func File(fileName, language string, code []byte) ([]template.HTML, string, error) {
NewContext()

if len(code) > sizeLimit {
Expand Down Expand Up @@ -183,24 +182,24 @@ func File(fileName, language string, code []byte) ([]string, string, error) {
tokensLines := chroma.SplitTokensIntoLines(iterator.Tokens())
htmlBuf := &bytes.Buffer{}

lines := make([]string, 0, len(tokensLines))
lines := make([]template.HTML, 0, len(tokensLines))
for _, tokens := range tokensLines {
iterator = chroma.Literator(tokens...)
err = formatter.Format(htmlBuf, githubStyles, iterator)
if err != nil {
return nil, "", fmt.Errorf("can't format code: %w", err)
}
lines = append(lines, htmlBuf.String())
lines = append(lines, template.HTML(htmlBuf.String()))
htmlBuf.Reset()
}

return lines, lexerName, nil
}

// PlainText returns non-highlighted HTML for code
func PlainText(code []byte) []string {
func PlainText(code []byte) []template.HTML {
r := bufio.NewReader(bytes.NewReader(code))
m := make([]string, 0, bytes.Count(code, []byte{'\n'})+1)
m := make([]template.HTML, 0, bytes.Count(code, []byte{'\n'})+1)
for {
content, err := r.ReadString('\n')
if err != nil && err != io.EOF {
Expand All @@ -210,7 +209,7 @@ func PlainText(code []byte) []string {
if content == "" && err == io.EOF {
break
}
s := gohtml.EscapeString(content)
s := template.HTML(gohtml.EscapeString(content))
m = append(m, s)
}
return m
Expand Down
32 changes: 21 additions & 11 deletions modules/highlight/highlight_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,36 @@
package highlight

import (
"html/template"
"strings"
"testing"

"github.com/stretchr/testify/assert"
)

func lines(s string) []string {
return strings.Split(strings.ReplaceAll(strings.TrimSpace(s), `\n`, "\n"), "\n")
func lines(s string) (out []template.HTML) {
// "" => [], "a" => ["a"], "a\n" => ["a\n"], "a\nb" => ["a\n", "b"] (each line always includes EOL "\n" if it exists)
out = make([]template.HTML, 0)
s = strings.ReplaceAll(strings.ReplaceAll(strings.TrimSpace(s), "\n", ""), `\n`, "\n")
for {
if p := strings.IndexByte(s, '\n'); p != -1 {
out = append(out, template.HTML(s[:p+1]))
s = s[p+1:]
} else {
break
}
}
if s != "" {
out = append(out, template.HTML(s))
}
return out
}

func TestFile(t *testing.T) {
tests := []struct {
name string
code string
want []string
want []template.HTML
lexerName string
}{
{
Expand Down Expand Up @@ -99,10 +114,7 @@ c=2
t.Run(tt.name, func(t *testing.T) {
out, lexerName, err := File(tt.name, "", []byte(tt.code))
assert.NoError(t, err)
expected := strings.Join(tt.want, "\n")
actual := strings.Join(out, "\n")
assert.Equal(t, strings.Count(actual, "<span"), strings.Count(actual, "</span>"))
assert.EqualValues(t, expected, actual)
assert.EqualValues(t, tt.want, out)
assert.Equal(t, tt.lexerName, lexerName)
})
}
Expand All @@ -112,7 +124,7 @@ func TestPlainText(t *testing.T) {
tests := []struct {
name string
code string
want []string
want []template.HTML
}{
{
name: "empty.py",
Expand Down Expand Up @@ -165,9 +177,7 @@ c=2`),
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
out := PlainText([]byte(tt.code))
expected := strings.Join(tt.want, "\n")
actual := strings.Join(out, "\n")
assert.EqualValues(t, expected, actual)
assert.EqualValues(t, tt.want, out)
})
}
}
3 changes: 2 additions & 1 deletion modules/indexer/code/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package code
import (
"bytes"
"context"
"html/template"
"strings"

"code.gitea.io/gitea/modules/highlight"
Expand All @@ -22,7 +23,7 @@ type Result struct {
Language string
Color string
LineNumbers []int
FormattedLines string
FormattedLines template.HTML
}

type SearchResultLanguages = internal.SearchResultLanguages
Expand Down
2 changes: 1 addition & 1 deletion modules/markup/orgmode/orgmode.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ func Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error
}
lexer = chroma.Coalesce(lexer)

if _, err := w.WriteString(highlight.CodeFromLexer(lexer, source)); err != nil {
if _, err := w.WriteString(string(highlight.CodeFromLexer(lexer, source))); err != nil {
return ""
}
}
Expand Down
5 changes: 5 additions & 0 deletions modules/setting/ui.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ var UI = struct {
OnlyShowRelevantRepos bool
ExploreDefaultSort string `ini:"EXPLORE_PAGING_DEFAULT_SORT"`

AmbiguousUnicodeDetection bool

Notification struct {
MinTimeout time.Duration
TimeoutStep time.Duration
Expand Down Expand Up @@ -82,6 +84,9 @@ var UI = struct {
Reactions: []string{`+1`, `-1`, `laugh`, `hooray`, `confused`, `heart`, `rocket`, `eyes`},
CustomEmojis: []string{`git`, `gitea`, `codeberg`, `gitlab`, `github`, `gogs`},
CustomEmojisMap: map[string]string{"git": ":git:", "gitea": ":gitea:", "codeberg": ":codeberg:", "gitlab": ":gitlab:", "github": ":github:", "gogs": ":gogs:"},

AmbiguousUnicodeDetection: true,

Notification: struct {
MinTimeout time.Duration
TimeoutStep time.Duration
Expand Down
3 changes: 1 addition & 2 deletions routers/web/repo/blame.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,8 +310,7 @@ func renderBlame(ctx *context.Context, blameParts []git.BlamePart, commitNames m
lexerName = lexerNameForLine
}

br.EscapeStatus, line = charset.EscapeControlHTML(line, ctx.Locale)
br.Code = gotemplate.HTML(line)
br.EscapeStatus, br.Code = charset.EscapeControlHTML(line, ctx.Locale)
rows = append(rows, br)
escapeStatus = escapeStatus.Or(br.EscapeStatus)
}
Expand Down
Loading

0 comments on commit 0a25514

Please sign in to comment.