Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Serve LFS/attachment with http.ServeContent to Support Range-Request #18448

Closed
wants to merge 13 commits into from
15 changes: 9 additions & 6 deletions integrations/download_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
package integrations

import (
"mime"
"net/http"
"testing"

Expand Down Expand Up @@ -70,24 +71,26 @@ func TestDownloadRawTextFileWithoutMimeTypeMapping(t *testing.T) {

session := loginUser(t, "user2")

req := NewRequest(t, "GET", "/user2/repo2/raw/branch/master/test.xml")
req := NewRequest(t, "GET", "/user2/repo2/raw/branch/master/bin.foo")
resp := session.MakeRequest(t, req, http.StatusOK)

assert.Equal(t, "text/plain; charset=utf-8", resp.HeaderMap.Get("Content-Type"))
assert.Equal(t, "application/octet-stream", resp.HeaderMap.Get("Content-Type"))
}

func TestDownloadRawTextFileWithMimeTypeMapping(t *testing.T) {
defer prepareTestEnv(t)()
setting.MimeTypeMap.Map[".xml"] = "text/xml"

setting.MimeTypeMap.Map[".foo"] = "audio/foo"
setting.MimeTypeMap.Enabled = true
_ = mime.AddExtensionType(".foo", "audio/foo")

session := loginUser(t, "user2")

req := NewRequest(t, "GET", "/user2/repo2/raw/branch/master/test.xml")
req := NewRequest(t, "GET", "/user2/repo2/raw/branch/master/bin.foo")
resp := session.MakeRequest(t, req, http.StatusOK)

assert.Equal(t, "text/xml; charset=utf-8", resp.HeaderMap.Get("Content-Type"))
assert.Equal(t, "audio/foo", resp.HeaderMap.Get("Content-Type"))

delete(setting.MimeTypeMap.Map, ".xml")
delete(setting.MimeTypeMap.Map, ".foo")
setting.MimeTypeMap.Enabled = false
}
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1032bbf17fbc0d9c95bb5418dabe8f8c99278700
808eedf6b8dd519aa89b59af2d815ed668580fc2
12 changes: 12 additions & 0 deletions modules/charset/charset.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,18 @@ func RemoveBOMIfPresent(content []byte) []byte {
return content
}

// DetectEncodingFromReader
// Read the head 1024 bytes from the reader and detect it's encoding
// Note: you may need reader.Seek(0, io.SeekStart) to reset the offset
func DetectEncodingFromReader(reader io.Reader) (string, error) {
buf := make([]byte, 1024)
n, err := util.ReadAtMost(reader, buf)
if err != nil {
return "", fmt.Errorf("DetectEncoding io error: %w", err)
}
return DetectEncoding(buf[:n])
}

// DetectEncoding detect the encoding of content
func DetectEncoding(content []byte) (string, error) {
if utf8.Valid(content) {
Expand Down
2 changes: 1 addition & 1 deletion modules/lfs/content_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ func (s *ContentStore) Verify(pointer Pointer) (bool, error) {
}

// ReadMetaObject will read a models.LFSMetaObject and return a reader
func ReadMetaObject(pointer Pointer) (io.ReadCloser, error) {
func ReadMetaObject(pointer Pointer) (io.ReadSeekCloser, error) {
contentStore := NewContentStore()
return contentStore.Get(pointer)
}
Expand Down
11 changes: 10 additions & 1 deletion modules/setting/mime_type_map.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@

package setting

import "strings"
import (
"mime"
"strings"

"code.gitea.io/gitea/modules/log"
)

// MimeTypeMap defines custom mime type mapping settings
var MimeTypeMap = struct {
Expand All @@ -21,6 +26,10 @@ func newMimeTypeMap() {
m := make(map[string]string, len(keys))
for _, key := range keys {
m[strings.ToLower(key.Name())] = key.Value()
err := mime.AddExtensionType(key.Name(), key.Value())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please dont add unrelated changes in here

if err != nil {
log.Warn("mime.AddExtensionType(%s,%s): %v", key.Name(), key.Value(), err)
}
}
MimeTypeMap.Map = m
if len(keys) > 0 {
Expand Down
42 changes: 34 additions & 8 deletions modules/typesniffer/typesniffer.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ package typesniffer
import (
"fmt"
"io"
"mime"
"net/http"
"path/filepath"
"regexp"
"strings"

Expand All @@ -32,32 +34,32 @@ type SniffedType struct {

// IsText etects if content format is plain text.
func (ct SniffedType) IsText() bool {
return strings.Contains(ct.contentType, "text/")
return strings.HasPrefix(ct.contentType, "text/")
}

// IsImage detects if data is an image format
func (ct SniffedType) IsImage() bool {
return strings.Contains(ct.contentType, "image/")
return strings.HasPrefix(ct.contentType, "image/")
}

// IsSvgImage detects if data is an SVG image format
func (ct SniffedType) IsSvgImage() bool {
return strings.Contains(ct.contentType, SvgMimeType)
return strings.HasPrefix(ct.contentType, SvgMimeType)
}

// IsPDF detects if data is a PDF format
func (ct SniffedType) IsPDF() bool {
return strings.Contains(ct.contentType, "application/pdf")
return strings.HasPrefix(ct.contentType, "application/pdf")
}

// IsVideo detects if data is an video format
func (ct SniffedType) IsVideo() bool {
return strings.Contains(ct.contentType, "video/")
return strings.HasPrefix(ct.contentType, "video/")
}

// IsAudio detects if data is an video format
func (ct SniffedType) IsAudio() bool {
return strings.Contains(ct.contentType, "audio/")
return strings.HasPrefix(ct.contentType, "audio/")
}

// IsRepresentableAsText returns true if file content can be represented as
Expand All @@ -66,6 +68,11 @@ func (ct SniffedType) IsRepresentableAsText() bool {
return ct.IsText() || ct.IsSvgImage()
}

// Mime return the mime
func (ct SniffedType) Mime() string {
return strings.Split(ct.contentType, ";")[0]
}

// DetectContentType extends http.DetectContentType with more content types. Defaults to text/unknown if input is empty.
func DetectContentType(data []byte) SniffedType {
if len(data) == 0 {
Expand All @@ -78,15 +85,34 @@ func DetectContentType(data []byte) SniffedType {
data = data[:sniffLen]
}

if (strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html")) && svgTagRegex.Match(data) ||
strings.Contains(ct, "text/xml") && svgTagInXMLRegex.Match(data) {
if (strings.HasPrefix(ct, "text/plain") || strings.HasPrefix(ct, "text/html")) && svgTagRegex.Match(data) ||
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not keep Contains?

strings.HasPrefix(ct, "text/xml") && svgTagInXMLRegex.Match(data) {
// SVG is unsupported. https://github.com/golang/go/issues/15888
ct = SvgMimeType
}

return SniffedType{ct}
}

// DetectContentTypeExtFirst
// detect content type by `name` first, if not found, detect by `reader`
// Note: you may need `reader.Seek(0, io.SeekStart)` to reset the offset
func DetectContentTypeExtFirst(name string, bytesOrReader interface{}) (SniffedType, error) {
ct := mime.TypeByExtension(filepath.Ext(name))
// FIXME: Not sure if `!strings.HasPrefix(ct, "text/")` is necessary to keep the old behavior.
if ct != "" && !strings.HasPrefix(ct, "text/") {
return SniffedType{ct}, nil
}
if r, ok := bytesOrReader.(io.Reader); ok {
st, err := DetectContentTypeFromReader(r)
if nil != err {
return SniffedType{}, err
}
return st, nil
}
return DetectContentType(bytesOrReader.([]byte)), nil
}

// DetectContentTypeFromReader guesses the content type contained in the reader.
func DetectContentTypeFromReader(r io.Reader) (SniffedType, error) {
buf := make([]byte, sniffLen)
Expand Down
126 changes: 76 additions & 50 deletions routers/common/repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@ package common
import (
"fmt"
"io"
"path"
"path/filepath"
"net/http"
"strconv"
"strings"
"time"

"code.gitea.io/gitea/modules/charset"
"code.gitea.io/gitea/modules/context"
Expand All @@ -21,7 +22,63 @@ import (
"code.gitea.io/gitea/modules/util"
)

// ServeBlob download a git.Blob
func setCommonHeaders(ctx *context.Context, name string, data interface{}) error {
// Google Chrome dislike commas in filenames, so let's change it to a space
name = strings.ReplaceAll(name, ",", " ")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto from the other PR:

I don't think this is needed actually, from what I remember about this weird bug, if you quote the filename(as being done in this function) chromium won't error about it.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's strange. Is it dependent on different chromium versions?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a historical problem, I believe modern browsers (and modern frameworks) can handle it correctly, and we do not need hacky methods.

https://stackoverflow.com/questions/93551/how-to-encode-the-filename-parameter-of-content-disposition-header-in-http


// Fixme: ctx.Repo.Repository.IsPrivate is nil
// if ctx.Repo.Repository.IsPrivate {
// ctx.Resp.Header().Set("Cache-Control", "private, max-age=300")
// } else {
// ctx.Resp.Header().Set("Cache-Control", "public, max-age=86400")
// }
ctx.Resp.Header().Set("Cache-Control", "public, max-age=300")

st, err := typesniffer.DetectContentTypeExtFirst(name, data)
if nil != err {
return err
}

// reset the offset to the start of served file
if seeker, ok := data.(io.ReadSeeker); ok {
_, _ = seeker.Seek(0, io.SeekStart)
}

if st.IsText() || ctx.FormBool("render") {
var cs string
var err error
if reader, ok := data.(io.ReadSeeker); ok {
cs, err = charset.DetectEncodingFromReader(reader)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need a seeker reader explicit for http.ServeContent - else change the detection to read in a buffer and use multi reader would also be a good option

_, _ = reader.Seek(0, io.SeekStart)
} else {
cs, err = charset.DetectEncoding(data.([]byte))
}
if err != nil {
log.Error("Detect raw file %s charset failed: %v, using by default utf-8", name, err)
cs = "utf-8"
}

// http.ServeContent has bug on detecting GBK charset
ctx.Resp.Header().Set("Content-Type", fmt.Sprintf("%s; charset=%s", st.Mime(), strings.ToLower(cs)))
} else if (st.IsImage() || st.IsPDF()) && (setting.UI.SVG.Enabled || !st.IsSvgImage()) {
ctx.Resp.Header().Set("Content-Type", st.Mime())
ctx.Resp.Header().Set("Access-Control-Expose-Headers", "Content-Disposition")
ctx.Resp.Header().Set("Content-Disposition", fmt.Sprintf(`inline; filename="%s"`, name))
if st.IsSvgImage() {
ctx.Resp.Header().Set("Content-Security-Policy", "default-src 'none'; style-src 'unsafe-inline'; sandbox")
ctx.Resp.Header().Set("X-Content-Type-Options", "nosniff")
ctx.Resp.Header().Set("Content-Type", typesniffer.SvgMimeType)
}
} else {
ctx.Resp.Header().Set("Content-Type", st.Mime())
ctx.Resp.Header().Set("Access-Control-Expose-Headers", "Content-Disposition")
ctx.Resp.Header().Set("Content-Disposition", fmt.Sprintf(`attachment; filename="%s"`, name))
}
return nil
}

// ServeBlob serve git.Blob which represents a normal(non-lfs) file stored in repositories
// todo: implement io.Seeker for git.Blob.blobReader to support Range-Request
func ServeBlob(ctx *context.Context, blob *git.Blob) error {
if httpcache.HandleGenericETagCache(ctx.Req, ctx.Resp, `"`+blob.ID.String()+`"`) {
return nil
Expand All @@ -37,70 +94,39 @@ func ServeBlob(ctx *context.Context, blob *git.Blob) error {
}
}()

return ServeData(ctx, ctx.Repo.TreePath, blob.Size(), dataRc)
}

// ServeData download file from io.Reader
func ServeData(ctx *context.Context, name string, size int64, reader io.Reader) error {
buf := make([]byte, 1024)
n, err := util.ReadAtMost(reader, buf)
n, err := util.ReadAtMost(dataRc, buf)
if err != nil {
return err
}
if n >= 0 {
buf = buf[:n]
}

ctx.Resp.Header().Set("Cache-Control", "public,max-age=86400")

size := blob.Size()
if size >= 0 {
ctx.Resp.Header().Set("Content-Length", fmt.Sprintf("%d", size))
ctx.Resp.Header().Set("Content-Length", strconv.FormatInt(size, 10))
} else {
log.Error("ServeData called to serve data: %s with size < 0: %d", name, size)
log.Error("ServeData called to serve data: %s with size < 0: %d", ctx.Repo.TreePath, size)
}
name = path.Base(name)

// Google Chrome dislike commas in filenames, so let's change it to a space
name = strings.ReplaceAll(name, ",", " ")

st := typesniffer.DetectContentType(buf)

mappedMimeType := ""
if setting.MimeTypeMap.Enabled {
fileExtension := strings.ToLower(filepath.Ext(name))
mappedMimeType = setting.MimeTypeMap.Map[fileExtension]
}
if st.IsText() || ctx.FormBool("render") {
cs, err := charset.DetectEncoding(buf)
if err != nil {
log.Error("Detect raw file %s charset failed: %v, using by default utf-8", name, err)
cs = "utf-8"
}
if mappedMimeType == "" {
mappedMimeType = "text/plain"
}
ctx.Resp.Header().Set("Content-Type", mappedMimeType+"; charset="+strings.ToLower(cs))
} else {
ctx.Resp.Header().Set("Access-Control-Expose-Headers", "Content-Disposition")
if mappedMimeType != "" {
ctx.Resp.Header().Set("Content-Type", mappedMimeType)
}
if (st.IsImage() || st.IsPDF()) && (setting.UI.SVG.Enabled || !st.IsSvgImage()) {
ctx.Resp.Header().Set("Content-Disposition", fmt.Sprintf(`inline; filename="%s"`, name))
if st.IsSvgImage() {
ctx.Resp.Header().Set("Content-Security-Policy", "default-src 'none'; style-src 'unsafe-inline'; sandbox")
ctx.Resp.Header().Set("X-Content-Type-Options", "nosniff")
ctx.Resp.Header().Set("Content-Type", typesniffer.SvgMimeType)
}
} else {
ctx.Resp.Header().Set("Content-Disposition", fmt.Sprintf(`attachment; filename="%s"`, name))
}
if err := setCommonHeaders(ctx, ctx.Repo.TreePath, buf); err != nil {
return err
}

_, err = ctx.Resp.Write(buf)
if err != nil {
return err
}
_, err = io.Copy(ctx.Resp, reader)
_, err = io.Copy(ctx.Resp, dataRc)
return err
}

// ServeLargeFile Serve files stored with Git LFS and attachments uploaded on the Releases page
func ServeLargeFile(ctx *context.Context, name string, time time.Time, reader io.ReadSeeker) error {
if err := setCommonHeaders(ctx, name, reader); err != nil {
return err
}
http.ServeContent(ctx.Resp, ctx.Req, name, time, reader)
return nil
}
2 changes: 1 addition & 1 deletion routers/web/repo/attachment.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ func GetAttachment(ctx *context.Context) {
}
defer fr.Close()

if err = common.ServeData(ctx, attach.Name, attach.Size, fr); err != nil {
if err = common.ServeLargeFile(ctx, attach.Name, attach.CreatedUnix.AsTime(), fr); err != nil {
ctx.ServerError("ServeData", err)
return
}
Expand Down
2 changes: 1 addition & 1 deletion routers/web/repo/download.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ func ServeBlobOrLFS(ctx *context.Context, blob *git.Blob) error {
log.Error("ServeBlobOrLFS: Close: %v", err)
}
}()
return common.ServeData(ctx, ctx.Repo.TreePath, meta.Size, lfsDataRc)
return common.ServeLargeFile(ctx, ctx.Repo.TreePath, meta.CreatedUnix.AsTime(), lfsDataRc)
}
if err = dataRc.Close(); err != nil {
log.Error("ServeBlobOrLFS: Close: %v", err)
Expand Down