Skip to content

Commit

Permalink
Make content detection ignoring write sizes.
Browse files Browse the repository at this point in the history
Copy less to buffer on big writes.
  • Loading branch information
klauspost committed Jun 3, 2021
1 parent f51c216 commit b7e9e8e
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 7 deletions.
33 changes: 27 additions & 6 deletions gzhttp/gzip.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ const (
// DefaultMinSize is the default minimum size until we enable gzip compression.
// 1500 bytes is the MTU size for the internet since that is the largest size allowed at the network layer.
// If you take a file that is 1300 bytes and compress it to 800 bytes, it’s still transmitted in that same 1500 byte packet regardless, so you’ve gained nothing.
// That being the case, you should restrict the gzip compression to files with a size greater than a single packet, 1400 bytes (1.4KB) is a safe value.
DefaultMinSize = 1400
// That being the case, you should restrict the gzip compression to files with a size (plus header) greater than a single packet,
// 1024 bytes (1KB) is therefore default.
DefaultMinSize = 1024
)

// GzipResponseWriter provides an http.ResponseWriter interface, which gzips
Expand Down Expand Up @@ -81,9 +82,18 @@ func (w *GzipResponseWriter) Write(b []byte) (int, error) {
return w.ResponseWriter.Write(b)
}

// Save the write into a buffer for later use in GZIP responseWriter (if content is long enough) or at close with regular responseWriter.
// On the first write, w.buf changes from nil to a valid slice
w.buf = append(w.buf, b...)
// Save the write into a buffer for later use in GZIP responseWriter
// (if content is long enough) or at close with regular responseWriter.
wantBuf := 512
if w.minSize > wantBuf {
wantBuf = w.minSize
}
toAdd := len(b)
if len(w.buf)+toAdd > wantBuf {
toAdd = wantBuf - len(w.buf)
}
w.buf = append(w.buf, b[:toAdd]...)
remain := b[toAdd:]

var (
cl, _ = atoi(w.Header().Get(contentLength))
Expand Down Expand Up @@ -117,6 +127,11 @@ func (w *GzipResponseWriter) Write(b []byte) (int, error) {
if err := w.startGzip(); err != nil {
return 0, err
}
if len(remain) > 0 {
if _, err := w.gw.Write(remain); err != nil {
return 0, err
}
}
return len(b), nil
}
}
Expand All @@ -125,6 +140,11 @@ func (w *GzipResponseWriter) Write(b []byte) (int, error) {
if err := w.startPlain(); err != nil {
return 0, err
}
if len(remain) > 0 {
if _, err := w.ResponseWriter.Write(remain); err != nil {
return 0, err
}
}
return len(b), nil
}

Expand Down Expand Up @@ -164,6 +184,7 @@ func (w *GzipResponseWriter) startGzip() error {
if err == nil && n < len(w.buf) {
err = io.ErrShortWrite
}
w.buf = w.buf[:0]
return err
}
return nil
Expand All @@ -189,7 +210,7 @@ func (w *GzipResponseWriter) startPlain() error {
err = io.ErrShortWrite
}

w.buf = nil
w.buf = w.buf[:0]
return err
}

Expand Down
100 changes: 99 additions & 1 deletion gzhttp/gzip_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,9 @@ func TestNewGzipLevelHandler(t *testing.T) {
assertEqual(t, "gzip", res.Header.Get("Content-Encoding"))
assertEqual(t, "Accept-Encoding", res.Header.Get("Vary"))
got := gzipStrLevel(testBody, lvl)
assertEqual(t, got, resp.Body.Bytes())
if lvl != gzip.StatelessCompression {
assertEqual(t, got, resp.Body.Bytes())
}
t.Log(lvl, len(got))
})
}
Expand Down Expand Up @@ -761,6 +763,102 @@ func TestDefaultContentTypes(t *testing.T) {
}
}

var sniffTests = []struct {
desc string
data []byte
contentType string
}{
// Some nonsense.
{"Empty", []byte{}, "text/plain; charset=utf-8"},
{"Binary", []byte{1, 2, 3}, "application/octet-stream"},

{"HTML document #1", []byte(`<HtMl><bOdY>blah blah blah</body></html>`), "text/html; charset=utf-8"},
{"HTML document #2", []byte(`<HTML></HTML>`), "text/html; charset=utf-8"},
{"HTML document #3 (leading whitespace)", []byte(` <!DOCTYPE HTML>...`), "text/html; charset=utf-8"},
{"HTML document #4 (leading CRLF)", []byte("\r\n<html>..."), "text/html; charset=utf-8"},

{"Plain text", []byte(`This is not HTML. It has ☃ though.`), "text/plain; charset=utf-8"},

{"XML", []byte("\n<?xml!"), "text/xml; charset=utf-8"},

// Image types.
{"Windows icon", []byte("\x00\x00\x01\x00"), "image/x-icon"},
{"Windows cursor", []byte("\x00\x00\x02\x00"), "image/x-icon"},
{"BMP image", []byte("BM..."), "image/bmp"},
{"GIF 87a", []byte(`GIF87a`), "image/gif"},
{"GIF 89a", []byte(`GIF89a...`), "image/gif"},
{"WEBP image", []byte("RIFF\x00\x00\x00\x00WEBPVP"), "image/webp"},
{"PNG image", []byte("\x89PNG\x0D\x0A\x1A\x0A"), "image/png"},
{"JPEG image", []byte("\xFF\xD8\xFF"), "image/jpeg"},

// Audio types.
{"MIDI audio", []byte("MThd\x00\x00\x00\x06\x00\x01"), "audio/midi"},
{"MP3 audio/MPEG audio", []byte("ID3\x03\x00\x00\x00\x00\x0f"), "audio/mpeg"},
{"WAV audio #1", []byte("RIFFb\xb8\x00\x00WAVEfmt \x12\x00\x00\x00\x06"), "audio/wave"},
{"WAV audio #2", []byte("RIFF,\x00\x00\x00WAVEfmt \x12\x00\x00\x00\x06"), "audio/wave"},
{"AIFF audio #1", []byte("FORM\x00\x00\x00\x00AIFFCOMM\x00\x00\x00\x12\x00\x01\x00\x00\x57\x55\x00\x10\x40\x0d\xf3\x34"), "audio/aiff"},

{"OGG audio", []byte("OggS\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x7e\x46\x00\x00\x00\x00\x00\x00\x1f\xf6\xb4\xfc\x01\x1e\x01\x76\x6f\x72"), "application/ogg"},
{"Must not match OGG", []byte("owow\x00"), "application/octet-stream"},
{"Must not match OGG", []byte("oooS\x00"), "application/octet-stream"},
{"Must not match OGG", []byte("oggS\x00"), "application/octet-stream"},

// Video types.
{"MP4 video", []byte("\x00\x00\x00\x18ftypmp42\x00\x00\x00\x00mp42isom<\x06t\xbfmdat"), "video/mp4"},
{"AVI video #1", []byte("RIFF,O\n\x00AVI LISTÀ"), "video/avi"},
{"AVI video #2", []byte("RIFF,\n\x00\x00AVI LISTÀ"), "video/avi"},

// Font types.
// {"MS.FontObject", []byte("\x00\x00")},
{"TTF sample I", []byte("\x00\x01\x00\x00\x00\x17\x01\x00\x00\x04\x01\x60\x4f"), "font/ttf"},
{"TTF sample II", []byte("\x00\x01\x00\x00\x00\x0e\x00\x80\x00\x03\x00\x60\x46"), "font/ttf"},

{"OTTO sample I", []byte("\x4f\x54\x54\x4f\x00\x0e\x00\x80\x00\x03\x00\x60\x42\x41\x53\x45"), "font/otf"},

{"woff sample I", []byte("\x77\x4f\x46\x46\x00\x01\x00\x00\x00\x00\x30\x54\x00\x0d\x00\x00"), "font/woff"},
{"woff2 sample", []byte("\x77\x4f\x46\x32\x00\x01\x00\x00\x00"), "font/woff2"},
{"wasm sample", []byte("\x00\x61\x73\x6d\x01\x00"), "application/wasm"},

// Archive types
{"RAR v1.5-v4.0", []byte("Rar!\x1A\x07\x00"), "application/x-rar-compressed"},
{"RAR v5+", []byte("Rar!\x1A\x07\x01\x00"), "application/x-rar-compressed"},
{"Incorrect RAR v1.5-v4.0", []byte("Rar \x1A\x07\x00"), "application/octet-stream"},
{"Incorrect RAR v5+", []byte("Rar \x1A\x07\x01\x00"), "application/octet-stream"},
}

func TestContentTypeDetect(t *testing.T) {
for _, tt := range sniffTests {
t.Run(tt.desc, func(t *testing.T) {
handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
for i := range tt.data {
// Do one byte writes...
w.Write([]byte{tt.data[i]})
}
w.Write(testBody)
})

wrapper, err := NewWrapper()
assertNil(t, err)

req, _ := http.NewRequest("GET", "/whatever", nil)
req.Header.Set("Accept-Encoding", "gzip")
resp := httptest.NewRecorder()
wrapper(handler).ServeHTTP(resp, req)
res := resp.Result()

assertEqual(t, 200, res.StatusCode)
assertEqual(t, tt.contentType, res.Header.Get("Content-Type"))
shouldGZ := DefaultContentTypeFilter(tt.contentType)
if shouldGZ {
assertEqual(t, "gzip", res.Header.Get("Content-Encoding"))
} else {
assertNotEqual(t, "gzip", res.Header.Get("Content-Encoding"))
}
})
}
}

// --------------------------------------------------------------------

func BenchmarkGzipHandler_S2k(b *testing.B) { benchmark(b, false, 2048, gzip.DefaultCompression) }
Expand Down

0 comments on commit b7e9e8e

Please sign in to comment.