Skip to content

Commit

Permalink
Auto decompress gzip-encoding response bodies (#838)
Browse files Browse the repository at this point in the history
* Auto decompress gzip-encoding response bodies

Implements #256. If the content-encoding of a response is "gzip"
and the keyword argument `decompress === true`, then we'll
use CodecZlib.jl to decompress the response and set as the response
body. Passing `decompress=false` will leave the resposne body as-is.
We also support `HTTP.decode(::Request, "gzip")` which will do
the decompression.

* fixes

* fix

* fix

* fix'

* fix
  • Loading branch information
quinnj authored May 28, 2022
1 parent 7a0fdf2 commit 6714dcb
Show file tree
Hide file tree
Showing 8 changed files with 31 additions and 28 deletions.
3 changes: 2 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ version = "0.9.17"

[deps]
Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
IniFile = "83e8ac13-25f8-5344-8a64-a9f2b223428f"
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
Expand All @@ -15,8 +16,8 @@ Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"

[compat]
LoggingExtras = "0.5"
IniFile = "0.5"
LoggingExtras = "0.5"
MbedTLS = "0.6.8, 0.7, 1"
URIs = "1.3"
julia = "1.6"
Expand Down
4 changes: 2 additions & 2 deletions src/DebugRequest.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ If `verbose` keyword arg is > 0, or the HTTP.jl global `DEBUG_LEVEL[]` is > 0,
then enabled debug logging with verbosity `verbose` for the lifetime of the request.
"""
function debuglayer(handler)
return function(request; verbose::Int=0, kw...)
return function(request; verbose=0, kw...)
# if debugging, enable by wrapping request in custom logger logic
if verbose >= 0 || DEBUG_LEVEL[] >= 0
if verbose > 0 || DEBUG_LEVEL[] > 0
LoggingExtras.withlevel(Logging.Debug; verbosity=verbose) do
handler(request; verbose=verbose, kw...)
end
Expand Down
2 changes: 1 addition & 1 deletion src/HTTP.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ include("access_log.jl")

include("Pairs.jl") ;using .Pairs
include("IOExtras.jl") ;using .IOExtras
include("Strings.jl")
include("Strings.jl") ;using .Strings
include("sniff.jl")
include("multipart.jl")
include("Parsers.jl") ;import .Parsers: Headers, Header,
Expand Down
6 changes: 3 additions & 3 deletions src/Messages.jl
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,12 @@ export Message, Request, Response,
readchunksize,
writeheaders, writestartline,
bodylength, unknown_length,
payload, statustext
payload, decode, statustext

import ..HTTP

using ..URIs
using CodecZlib
using ..Pairs
using ..IOExtras
using ..Parsers
Expand Down Expand Up @@ -483,9 +484,8 @@ payload(m::Message, ::Type{String}) =

function decode(m::Message, encoding::String)::Vector{UInt8}
if encoding == "gzip"
# Use https://github.com/bicycle1885/TranscodingStreams.jl ?
return transcode(GzipDecompressor, m.body)
end
@warn "Decoding of HTTP Transfer-Encoding is not implemented yet!"
return m.body
end

Expand Down
16 changes: 2 additions & 14 deletions src/Parsers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ have field names compatible with those expected by the `parse_status_line!` and
"""
module Parsers

import ..access_threaded
import ..access_threaded, ..iso8859_1_to_utf8

export Header, Headers,
find_end_of_header, find_end_of_chunk_size, find_end_of_trailer,
Expand Down Expand Up @@ -222,19 +222,7 @@ function parse_header_field(bytes::SubString{String})::Tuple{Header,SubString{St
# these from latin-1 => utf-8 and then try to parse.
if !isvalid(bytes)
@warn "malformed HTTP header detected; attempting to re-encode from Latin-1 to UTF8"
rawbytes = codeunits(bytes)
buf = Base.StringVector(length(rawbytes) + count((0x80), rawbytes))
i = 0
for byte in rawbytes
if byte 0x80
buf[i += 1] = 0xc0 | (byte >> 6)
buf[i += 1] = 0x80 | (byte & 0x3f)
else
buf[i += 1] = byte
end
end
bytes = SubString(String(buf))
!isvalid(bytes) && @goto error
bytes = SubString(iso8859_1_to_utf8(codeunits(bytes)))
end

# First look for: field-name ":" field-value
Expand Down
11 changes: 7 additions & 4 deletions src/StreamRequest.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import ..ConnectionPool
using ..MessageRequest
import ..RedirectRequest: nredirects
import ..sprintcompact
using LoggingExtras
using LoggingExtras, CodecZlib

export streamlayer

Expand All @@ -22,7 +22,7 @@ immediately so that the transmission can be aborted if the `Response` status
indicates that the server does not wish to receive the message body.
[RFC7230 6.5](https://tools.ietf.org/html/rfc7230#section-6.5).
"""
function streamlayer(stream::Stream; iofunction=nothing, redirect_limit::Int=3, kw...)::Response
function streamlayer(stream::Stream; iofunction=nothing, redirect_limit::Int=3, decompress::Bool=true, kw...)::Response
response = stream.message
req = response.request
io = stream.stream
Expand All @@ -49,7 +49,7 @@ function streamlayer(stream::Stream; iofunction=nothing, redirect_limit::Int=3,
end
@debugv 2 "client startread"
startread(stream)
readbody(stream, response, redirect_limit == nredirects(req))
readbody(stream, response, redirect_limit == nredirects(req), decompress)
else
iofunction(stream)
end
Expand Down Expand Up @@ -103,7 +103,10 @@ end
writechunk(stream, body::IO) = writebodystream(stream, body)
writechunk(stream, body) = write(stream, body)

function readbody(stream::Stream, res::Response, redirectlimitreached)
function readbody(stream::Stream, res::Response, redirectlimitreached, decompress)
if decompress && header(res, "Content-Encoding") == "gzip"
stream = GzipDecompressorStream(stream)
end
if isbytes(res.body)
res.body = read(stream)
else
Expand Down
6 changes: 3 additions & 3 deletions src/Strings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,14 @@ tocameldash(s::AbstractString) = tocameldash(String(s))
Convert from ISO8859_1 to UTF8.
"""
function iso8859_1_to_utf8(bytes::Vector{UInt8})
function iso8859_1_to_utf8(bytes::AbstractVector{UInt8})
io = IOBuffer()
for b in bytes
if b < 0x80
write(io, b)
else
write(io, 0xc0 | b >> 6)
write(io, 0x80 | b & 0x3f)
write(io, 0xc0 | (b >> 6))
write(io, 0x80 | (b & 0x3f))
end
end
return String(take!(io))
Expand Down
11 changes: 11 additions & 0 deletions test/client.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,17 @@ end
@test status(HTTP.patch("$sch://httpbin.org/patch")) == 200
end

@testset "decompress" begin
r = HTTP.get("$sch://httpbin.org/gzip")
@test status(r) == 200
@test isascii(String(r.body))
r = HTTP.get("$sch://httpbin.org/gzip"; decompress=false)
@test status(r) == 200
@test !isascii(String(r.body))
r = HTTP.get("$sch://httpbin.org/gzip"; decompress=false)
@test isascii(String(HTTP.decode(r, "gzip")))
end

@testset "ASync Client Requests" begin
@test status(fetch(@async HTTP.get("$sch://httpbin.org/ip"))) == 200
@test status(HTTP.get("$sch://httpbin.org/encoding/utf8")) == 200
Expand Down

0 comments on commit 6714dcb

Please sign in to comment.