From aece31d7ea7d09d4461d72ab7d7ceeca97714062 Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Mon, 23 Oct 2017 18:43:15 +0900 Subject: [PATCH 01/12] add Base64 module --- stdlib/Base64/src/Base64.jl | 394 +++++++++++++++++++++++++++++++++ stdlib/Base64/test/runtests.jl | 63 ++++++ 2 files changed, 457 insertions(+) create mode 100644 stdlib/Base64/src/Base64.jl create mode 100644 stdlib/Base64/test/runtests.jl diff --git a/stdlib/Base64/src/Base64.jl b/stdlib/Base64/src/Base64.jl new file mode 100644 index 0000000000000..694ceb5901916 --- /dev/null +++ b/stdlib/Base64/src/Base64.jl @@ -0,0 +1,394 @@ +module Base64 + +export + Base64EncodePipe, + base64encode, + Base64DecodePipe, + base64decode + +# Data buffer for pipes. +mutable struct Buffer + data::Vector{UInt8} + ptr::Ptr{UInt8} + size::Int + + function Buffer(bufsize) + data = Vector{UInt8}(bufsize) + return new(data, pointer(data), 0) + end +end + +Base.empty!(buffer::Buffer) = buffer.size = 0 +Base.getindex(buffer::Buffer, i::Integer) = unsafe_load(buffer.ptr, i) +Base.setindex!(buffer::Buffer, v::UInt8, i::Integer) = unsafe_store!(buffer.ptr, v, i) +Base.endof(buffer::Buffer) = buffer.size +Base.pointer(buffer::Buffer) = buffer.ptr +capacity(buffer::Buffer) = Int(pointer(buffer.data, endof(buffer.data) + 1) - buffer.ptr) + +function consumed!(buffer::Buffer, n::Integer) + @assert n ≤ buffer.size + buffer.ptr += n + buffer.size -= n +end + +function read_to_buffer(io::IO, buffer::Buffer) + offset = buffer.ptr - pointer(buffer.data) + copy!(buffer.data, 1, buffer.data, offset, buffer.size) + buffer.ptr = pointer(buffer.data) + buffer.size + if !eof(io) + n = min(nb_available(io), capacity(buffer) - buffer.size) + unsafe_read(io, buffer.ptr + buffer.size, n) + buffer.size += n + end + return +end + +const BASE64_ENCODE = [UInt8(x) for x in ['A':'Z'; 'a':'z'; '0':'9'; '+'; '/']] +encode(x::UInt8) = BASE64_ENCODE[(x & 0x3f) + 1] +encodepadding() = UInt8('=') + +const BASE64_CODE_END = 0x40 +const BASE64_CODE_PAD = 0x41 +const BASE64_CODE_IGN = 0x42 +const BASE64_DECODE = fill(BASE64_CODE_IGN, 256) +for (i, c) in enumerate(BASE64_ENCODE) + BASE64_DECODE[Int(c)+1] = UInt8(i - 1) +end +BASE64_DECODE[Int(encodepadding())+1] = BASE64_CODE_PAD +decode(x::UInt8) = BASE64_DECODE[x + 1] + + +# Encoder +# ------- + +struct Base64EncodePipe <: IO + io::IO + buffer::Buffer + + function Base64EncodePipe(io::IO) + # The buffer size must be at least 3. + buffer = Buffer(512) + pipe = new(io, buffer) + finalizer(buffer, _ -> close(pipe)) + return pipe + end +end + +function Base.unsafe_write(pipe::Base64EncodePipe, ptr::Ptr{UInt8}, n::UInt)::Int + buffer = pipe.buffer + m = buffer.size + b1, b2, b3, k = loadtriplet!(buffer, ptr, n) + @assert k ≥ m + p = ptr + k - m + if k < 3 + if k == 1 + buffer[1] = b1 + buffer.size = 1 + elseif k == 2 + buffer[1] = b1 + buffer[2] = b2 + buffer.size = 2 + end + return p - ptr + end + @assert buffer.size == 0 + + capacity = length(buffer.data) + i = 0 + p_end = ptr + n + while true + buffer[i+1] = encode(b1 >> 2 ) + buffer[i+2] = encode(b1 << 4 | b2 >> 4) + buffer[i+3] = encode(b2 << 2 | b3 >> 6) + buffer[i+4] = encode( b3 ) + i += 4 + if p + 2 < p_end + b1 = unsafe_load(p, 1) + b2 = unsafe_load(p, 2) + b3 = unsafe_load(p, 3) + p += 3 + else + break + end + if i + 4 > capacity + unsafe_write(pipe.io, pointer(buffer), i) + i = 0 + end + end + if i > 0 + unsafe_write(pipe.io, pointer(buffer), i) + end + + while p < p_end + buffer[buffer.size+=1] = unsafe_load(p, 1) + p += 1 + end + return p - ptr +end + +function Base.write(pipe::Base64EncodePipe, x::UInt8) + buffer = pipe.buffer + buffer[buffer.size+=1] = x + if buffer.size == 3 + unsafe_write(pipe, C_NULL, 0) + end + return 1 +end + +function Base.close(pipe::Base64EncodePipe) + b1, b2, b3, k = loadtriplet!(pipe.buffer, convert(Ptr{UInt8}, C_NULL), 0) + if k == 0 + # no leftover and padding + elseif k == 1 + write(pipe.io, + encode(b1 >> 2), + encode(b1 << 4), + encodepadding(), + encodepadding()) + elseif k == 2 + write(pipe.io, + encode( b1 >> 2), + encode(b1 << 4 | b2 >> 4), + encode(b2 << 2 ), + encodepadding()) + else + @assert k == 3 + write(pipe.io, + encode(b1 >> 2 ), + encode(b1 << 4 | b2 >> 4), + encode(b2 << 2 | b3 >> 6), + encode( b3 )) + end + return nothing +end + +# Load three bytes from buffer and ptr. +function loadtriplet!(buffer::Buffer, ptr::Ptr{UInt8}, n::Integer) + b1 = b2 = b3 = 0x00 + if buffer.size == 0 + if n == 0 + k = 0 + elseif n == 1 + b1 = unsafe_load(ptr, 1) + k = 1 + elseif n == 2 + b1 = unsafe_load(ptr, 1) + b2 = unsafe_load(ptr, 2) + k = 2 + else + b1 = unsafe_load(ptr, 1) + b2 = unsafe_load(ptr, 2) + b3 = unsafe_load(ptr, 3) + k = 3 + end + elseif buffer.size == 1 + b1 = buffer[1] + if n == 0 + k = 1 + elseif n == 1 + b2 = unsafe_load(ptr, 1) + k = 2 + else + b2 = unsafe_load(ptr, 1) + b3 = unsafe_load(ptr, 2) + k = 3 + end + elseif buffer.size == 2 + b1 = buffer[1] + b2 = buffer[2] + if n == 0 + k = 2 + else + b3 = unsafe_load(ptr, 1) + k = 3 + end + else + @assert buffer.size == 3 + b1 = buffer[1] + b2 = buffer[2] + b3 = buffer[3] + k = 3 + end + empty!(buffer) + return b1, b2, b3, k +end + +function base64encode(f::Function, args...) + s = IOBuffer() + b = Base64EncodePipe(s) + f(b, args...) + close(b) + return String(take!(s)) +end +base64encode(args...) = base64encode(write, args...) + + +# Decoder +# ------- + +struct Base64DecodePipe <: IO + io::IO + buffer::Buffer + rest::Vector{UInt} + + function Base64DecodePipe(io::IO) + buffer = Buffer(512) + pipe = new(io, buffer, UInt8[]) + finalizer(buffer, _ -> close(pipe)) + return pipe + end +end + +function Base.unsafe_read(pipe::Base64DecodePipe, ptr::Ptr{UInt8}, n::UInt) + p = read_avail(pipe, ptr, n) + if p < ptr + n + throw(EOFError()) + end + return nothing +end + +function read_avail(pipe::Base64DecodePipe, ptr::Ptr{UInt8}, n::UInt) + p = ptr + p_end = ptr + n + while !isempty(pipe.rest) && p < p_end + unsafe_store!(p, shift!(pipe.rest)) + p += 1 + end + + buffer = pipe.buffer + i = 0 + b1 = b2 = b3 = b4 = BASE64_CODE_IGN + while true + if b1 < 0x40 && b2 < 0x40 && b3 < 0x40 && b4 < 0x40 && p + 2 < p_end + # fast path to decode + unsafe_store!(p , b1 << 2 | b2 >> 4) + unsafe_store!(p + 1, b2 << 4 | b3 >> 2) + unsafe_store!(p + 2, b3 << 6 | b4 ) + p += 3 + else + i, p, finished = decode_slow(b1, b2, b3, b4, buffer, i, pipe.io, p, p_end - p, pipe.rest) + if finished + break + end + end + if p < p_end + if i + 4 ≤ endof(buffer) + b1 = decode(buffer[i+1]) + b2 = decode(buffer[i+2]) + b3 = decode(buffer[i+3]) + b4 = decode(buffer[i+4]) + i += 4 + else + consumed!(buffer, i) + read_to_buffer(pipe.io, buffer) + i = 0 + b1 = b2 = b3 = b4 = BASE64_CODE_IGN + end + else + break + end + end + consumed!(buffer, i) + + return p +end + +function Base.read(pipe::Base64DecodePipe, ::Type{UInt8}) + if isempty(pipe.rest) + unsafe_read(pipe, convert(Ptr{UInt8}, C_NULL), 0) + if isempty(pipe.rest) + throw(EOFError()) + end + end + return shift!(pipe.rest) +end + +function Base.readbytes!(pipe::Base64DecodePipe, data::AbstractVector{UInt8}, nb::Integer=length(data)) + filled::Int = 0 + while filled < nb && !eof(pipe) + if length(data) == filled + resize!(data, min(length(data) * 2, nb)) + end + p = pointer(data, filled + 1) + p_end = read_avail(pipe, p, UInt(min(length(data), nb) - filled)) + filled += p_end - p + end + resize!(data, filled) + return filled +end + +Base.eof(pipe::Base64DecodePipe) = isempty(pipe.rest) && eof(pipe.io) +Base.close(pipe::Base64DecodePipe) = nothing + +# Decode data from (b1, b2, b3, b5, buffer, input) into (ptr, rest). +function decode_slow(b1, b2, b3, b4, buffer, i, input, ptr, n, rest) + # Skip ignore code. + while true + if b1 == BASE64_CODE_IGN + b1, b2, b3 = b2, b3, b4 + elseif b2 == BASE64_CODE_IGN + b2, b3 = b3, b4 + elseif b3 == BASE64_CODE_IGN + b3 = b4 + elseif b4 == BASE64_CODE_IGN + # pass + else + break + end + if i + 1 ≤ endof(buffer) + b4 = decode(buffer[i+=1]) + elseif !eof(input) + b4 = decode(read(input, UInt8)) + else + b4 = BASE64_CODE_END + break + end + end + + # Check the decoded quadruplet. + k = 0 + finished = false + if b1 < 0x40 && b2 < 0x40 && b3 < 0x40 && b4 < 0x40 + # pass + k = 3 + elseif b1 < 0x40 && b2 < 0x40 && b3 < 0x40 && b4 == BASE64_CODE_PAD + b4 = 0x00 + k = 2 + elseif b1 < 0x40 && b2 < 0x40 && b3 == b4 == BASE64_CODE_PAD + b3 = b4 = 0x00 + k = 1 + elseif b1 == b2 == b3 == BASE64_CODE_IGN && b4 == BASE64_CODE_END + b1 = b2 = b3 = b4 = 0x00 + finished = true + else + throw(ArgumentError("malformed base64 sequence")) + end + + # Write output. + p = ptr + p_end = ptr + n + function output(b) + if p < p_end + unsafe_store!(p, b) + p += 1 + else + push!(rest, b) + end + end + k ≥ 1 && output(b1 << 2 | b2 >> 4) + k ≥ 2 && output(b2 << 4 | b3 >> 2) + k ≥ 3 && output(b3 << 6 | b4 ) + + return i, p, finished +end + +function base64decode(s) + b = IOBuffer(s) + try + return read(Base64DecodePipe(b)) + finally + close(b) + end +end + +end diff --git a/stdlib/Base64/test/runtests.jl b/stdlib/Base64/test/runtests.jl new file mode 100644 index 0000000000000..4c2b38b70efc3 --- /dev/null +++ b/stdlib/Base64/test/runtests.jl @@ -0,0 +1,63 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +using Test +import Base64: + Base64EncodePipe, + base64encode, + Base64DecodePipe, + base64decode + +const inputText = "Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure." +const encodedMaxLine76 = """ +TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlz +IHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2Yg +dGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGlu +dWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRo +ZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4=""" + +@testset "Examples" begin + # Encode and decode + fname = tempname() + open(fname, "w") do f + opipe = Base64EncodePipe(f) + write(opipe,inputText) + @test close(opipe) === nothing + end + + open(fname, "r") do f + ipipe = Base64DecodePipe(f) + @test read(ipipe, String) == inputText + @test close(ipipe) === nothing + end + rm(fname) + + # Encode to string and decode + @test String(base64decode(base64encode(inputText))) == inputText + + # Decode with max line chars = 76 and padding + ipipe = Base64DecodePipe(IOBuffer(encodedMaxLine76)) + @test read(ipipe, String) == inputText + + # Decode with max line chars = 76 and no padding + #ipipe = Base64DecodePipe(IOBuffer(encodedMaxLine76[1:end-1])) + #@test read(ipipe, String) == inputText + + # Decode with two padding characters ("==") + ipipe = Base64DecodePipe(IOBuffer(string(encodedMaxLine76[1:end-2],"=="))) + @test read(ipipe, String) == inputText[1:end-1] + + # Test incorrect format + ipipe = Base64DecodePipe(IOBuffer(encodedMaxLine76[1:end-3])) + @test_throws ArgumentError read(ipipe, String) + + # issue #21314 + @test base64decode(chomp("test")) == base64decode("test") +end + +@testset "Random data" begin + mt = MersenneTwister(1234) + for _ in 1:1000 + data = rand(mt, UInt8, rand(0:300)) + @test hash(base64decode(base64encode(data))) == hash(data) + end +end From 61cd43c329e0d8f864b4a3d8742fb74ebb4b02be Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Mon, 23 Oct 2017 18:48:18 +0900 Subject: [PATCH 02/12] separate files --- stdlib/Base64/src/Base64.jl | 389 +----------------------------------- stdlib/Base64/src/buffer.jl | 38 ++++ stdlib/Base64/src/decode.jl | 176 ++++++++++++++++ stdlib/Base64/src/encode.jl | 166 +++++++++++++++ 4 files changed, 385 insertions(+), 384 deletions(-) create mode 100644 stdlib/Base64/src/buffer.jl create mode 100644 stdlib/Base64/src/decode.jl create mode 100644 stdlib/Base64/src/encode.jl diff --git a/stdlib/Base64/src/Base64.jl b/stdlib/Base64/src/Base64.jl index 694ceb5901916..5c649a802e0f3 100644 --- a/stdlib/Base64/src/Base64.jl +++ b/stdlib/Base64/src/Base64.jl @@ -1,3 +1,5 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + module Base64 export @@ -6,389 +8,8 @@ export Base64DecodePipe, base64decode -# Data buffer for pipes. -mutable struct Buffer - data::Vector{UInt8} - ptr::Ptr{UInt8} - size::Int - - function Buffer(bufsize) - data = Vector{UInt8}(bufsize) - return new(data, pointer(data), 0) - end -end - -Base.empty!(buffer::Buffer) = buffer.size = 0 -Base.getindex(buffer::Buffer, i::Integer) = unsafe_load(buffer.ptr, i) -Base.setindex!(buffer::Buffer, v::UInt8, i::Integer) = unsafe_store!(buffer.ptr, v, i) -Base.endof(buffer::Buffer) = buffer.size -Base.pointer(buffer::Buffer) = buffer.ptr -capacity(buffer::Buffer) = Int(pointer(buffer.data, endof(buffer.data) + 1) - buffer.ptr) - -function consumed!(buffer::Buffer, n::Integer) - @assert n ≤ buffer.size - buffer.ptr += n - buffer.size -= n -end - -function read_to_buffer(io::IO, buffer::Buffer) - offset = buffer.ptr - pointer(buffer.data) - copy!(buffer.data, 1, buffer.data, offset, buffer.size) - buffer.ptr = pointer(buffer.data) + buffer.size - if !eof(io) - n = min(nb_available(io), capacity(buffer) - buffer.size) - unsafe_read(io, buffer.ptr + buffer.size, n) - buffer.size += n - end - return -end - -const BASE64_ENCODE = [UInt8(x) for x in ['A':'Z'; 'a':'z'; '0':'9'; '+'; '/']] -encode(x::UInt8) = BASE64_ENCODE[(x & 0x3f) + 1] -encodepadding() = UInt8('=') - -const BASE64_CODE_END = 0x40 -const BASE64_CODE_PAD = 0x41 -const BASE64_CODE_IGN = 0x42 -const BASE64_DECODE = fill(BASE64_CODE_IGN, 256) -for (i, c) in enumerate(BASE64_ENCODE) - BASE64_DECODE[Int(c)+1] = UInt8(i - 1) -end -BASE64_DECODE[Int(encodepadding())+1] = BASE64_CODE_PAD -decode(x::UInt8) = BASE64_DECODE[x + 1] - - -# Encoder -# ------- - -struct Base64EncodePipe <: IO - io::IO - buffer::Buffer - - function Base64EncodePipe(io::IO) - # The buffer size must be at least 3. - buffer = Buffer(512) - pipe = new(io, buffer) - finalizer(buffer, _ -> close(pipe)) - return pipe - end -end - -function Base.unsafe_write(pipe::Base64EncodePipe, ptr::Ptr{UInt8}, n::UInt)::Int - buffer = pipe.buffer - m = buffer.size - b1, b2, b3, k = loadtriplet!(buffer, ptr, n) - @assert k ≥ m - p = ptr + k - m - if k < 3 - if k == 1 - buffer[1] = b1 - buffer.size = 1 - elseif k == 2 - buffer[1] = b1 - buffer[2] = b2 - buffer.size = 2 - end - return p - ptr - end - @assert buffer.size == 0 - - capacity = length(buffer.data) - i = 0 - p_end = ptr + n - while true - buffer[i+1] = encode(b1 >> 2 ) - buffer[i+2] = encode(b1 << 4 | b2 >> 4) - buffer[i+3] = encode(b2 << 2 | b3 >> 6) - buffer[i+4] = encode( b3 ) - i += 4 - if p + 2 < p_end - b1 = unsafe_load(p, 1) - b2 = unsafe_load(p, 2) - b3 = unsafe_load(p, 3) - p += 3 - else - break - end - if i + 4 > capacity - unsafe_write(pipe.io, pointer(buffer), i) - i = 0 - end - end - if i > 0 - unsafe_write(pipe.io, pointer(buffer), i) - end - - while p < p_end - buffer[buffer.size+=1] = unsafe_load(p, 1) - p += 1 - end - return p - ptr -end - -function Base.write(pipe::Base64EncodePipe, x::UInt8) - buffer = pipe.buffer - buffer[buffer.size+=1] = x - if buffer.size == 3 - unsafe_write(pipe, C_NULL, 0) - end - return 1 -end - -function Base.close(pipe::Base64EncodePipe) - b1, b2, b3, k = loadtriplet!(pipe.buffer, convert(Ptr{UInt8}, C_NULL), 0) - if k == 0 - # no leftover and padding - elseif k == 1 - write(pipe.io, - encode(b1 >> 2), - encode(b1 << 4), - encodepadding(), - encodepadding()) - elseif k == 2 - write(pipe.io, - encode( b1 >> 2), - encode(b1 << 4 | b2 >> 4), - encode(b2 << 2 ), - encodepadding()) - else - @assert k == 3 - write(pipe.io, - encode(b1 >> 2 ), - encode(b1 << 4 | b2 >> 4), - encode(b2 << 2 | b3 >> 6), - encode( b3 )) - end - return nothing -end - -# Load three bytes from buffer and ptr. -function loadtriplet!(buffer::Buffer, ptr::Ptr{UInt8}, n::Integer) - b1 = b2 = b3 = 0x00 - if buffer.size == 0 - if n == 0 - k = 0 - elseif n == 1 - b1 = unsafe_load(ptr, 1) - k = 1 - elseif n == 2 - b1 = unsafe_load(ptr, 1) - b2 = unsafe_load(ptr, 2) - k = 2 - else - b1 = unsafe_load(ptr, 1) - b2 = unsafe_load(ptr, 2) - b3 = unsafe_load(ptr, 3) - k = 3 - end - elseif buffer.size == 1 - b1 = buffer[1] - if n == 0 - k = 1 - elseif n == 1 - b2 = unsafe_load(ptr, 1) - k = 2 - else - b2 = unsafe_load(ptr, 1) - b3 = unsafe_load(ptr, 2) - k = 3 - end - elseif buffer.size == 2 - b1 = buffer[1] - b2 = buffer[2] - if n == 0 - k = 2 - else - b3 = unsafe_load(ptr, 1) - k = 3 - end - else - @assert buffer.size == 3 - b1 = buffer[1] - b2 = buffer[2] - b3 = buffer[3] - k = 3 - end - empty!(buffer) - return b1, b2, b3, k -end - -function base64encode(f::Function, args...) - s = IOBuffer() - b = Base64EncodePipe(s) - f(b, args...) - close(b) - return String(take!(s)) -end -base64encode(args...) = base64encode(write, args...) - - -# Decoder -# ------- - -struct Base64DecodePipe <: IO - io::IO - buffer::Buffer - rest::Vector{UInt} - - function Base64DecodePipe(io::IO) - buffer = Buffer(512) - pipe = new(io, buffer, UInt8[]) - finalizer(buffer, _ -> close(pipe)) - return pipe - end -end - -function Base.unsafe_read(pipe::Base64DecodePipe, ptr::Ptr{UInt8}, n::UInt) - p = read_avail(pipe, ptr, n) - if p < ptr + n - throw(EOFError()) - end - return nothing -end - -function read_avail(pipe::Base64DecodePipe, ptr::Ptr{UInt8}, n::UInt) - p = ptr - p_end = ptr + n - while !isempty(pipe.rest) && p < p_end - unsafe_store!(p, shift!(pipe.rest)) - p += 1 - end - - buffer = pipe.buffer - i = 0 - b1 = b2 = b3 = b4 = BASE64_CODE_IGN - while true - if b1 < 0x40 && b2 < 0x40 && b3 < 0x40 && b4 < 0x40 && p + 2 < p_end - # fast path to decode - unsafe_store!(p , b1 << 2 | b2 >> 4) - unsafe_store!(p + 1, b2 << 4 | b3 >> 2) - unsafe_store!(p + 2, b3 << 6 | b4 ) - p += 3 - else - i, p, finished = decode_slow(b1, b2, b3, b4, buffer, i, pipe.io, p, p_end - p, pipe.rest) - if finished - break - end - end - if p < p_end - if i + 4 ≤ endof(buffer) - b1 = decode(buffer[i+1]) - b2 = decode(buffer[i+2]) - b3 = decode(buffer[i+3]) - b4 = decode(buffer[i+4]) - i += 4 - else - consumed!(buffer, i) - read_to_buffer(pipe.io, buffer) - i = 0 - b1 = b2 = b3 = b4 = BASE64_CODE_IGN - end - else - break - end - end - consumed!(buffer, i) - - return p -end - -function Base.read(pipe::Base64DecodePipe, ::Type{UInt8}) - if isempty(pipe.rest) - unsafe_read(pipe, convert(Ptr{UInt8}, C_NULL), 0) - if isempty(pipe.rest) - throw(EOFError()) - end - end - return shift!(pipe.rest) -end - -function Base.readbytes!(pipe::Base64DecodePipe, data::AbstractVector{UInt8}, nb::Integer=length(data)) - filled::Int = 0 - while filled < nb && !eof(pipe) - if length(data) == filled - resize!(data, min(length(data) * 2, nb)) - end - p = pointer(data, filled + 1) - p_end = read_avail(pipe, p, UInt(min(length(data), nb) - filled)) - filled += p_end - p - end - resize!(data, filled) - return filled -end - -Base.eof(pipe::Base64DecodePipe) = isempty(pipe.rest) && eof(pipe.io) -Base.close(pipe::Base64DecodePipe) = nothing - -# Decode data from (b1, b2, b3, b5, buffer, input) into (ptr, rest). -function decode_slow(b1, b2, b3, b4, buffer, i, input, ptr, n, rest) - # Skip ignore code. - while true - if b1 == BASE64_CODE_IGN - b1, b2, b3 = b2, b3, b4 - elseif b2 == BASE64_CODE_IGN - b2, b3 = b3, b4 - elseif b3 == BASE64_CODE_IGN - b3 = b4 - elseif b4 == BASE64_CODE_IGN - # pass - else - break - end - if i + 1 ≤ endof(buffer) - b4 = decode(buffer[i+=1]) - elseif !eof(input) - b4 = decode(read(input, UInt8)) - else - b4 = BASE64_CODE_END - break - end - end - - # Check the decoded quadruplet. - k = 0 - finished = false - if b1 < 0x40 && b2 < 0x40 && b3 < 0x40 && b4 < 0x40 - # pass - k = 3 - elseif b1 < 0x40 && b2 < 0x40 && b3 < 0x40 && b4 == BASE64_CODE_PAD - b4 = 0x00 - k = 2 - elseif b1 < 0x40 && b2 < 0x40 && b3 == b4 == BASE64_CODE_PAD - b3 = b4 = 0x00 - k = 1 - elseif b1 == b2 == b3 == BASE64_CODE_IGN && b4 == BASE64_CODE_END - b1 = b2 = b3 = b4 = 0x00 - finished = true - else - throw(ArgumentError("malformed base64 sequence")) - end - - # Write output. - p = ptr - p_end = ptr + n - function output(b) - if p < p_end - unsafe_store!(p, b) - p += 1 - else - push!(rest, b) - end - end - k ≥ 1 && output(b1 << 2 | b2 >> 4) - k ≥ 2 && output(b2 << 4 | b3 >> 2) - k ≥ 3 && output(b3 << 6 | b4 ) - - return i, p, finished -end - -function base64decode(s) - b = IOBuffer(s) - try - return read(Base64DecodePipe(b)) - finally - close(b) - end -end +include("buffer.jl") +include("encode.jl") +include("decode.jl") end diff --git a/stdlib/Base64/src/buffer.jl b/stdlib/Base64/src/buffer.jl new file mode 100644 index 0000000000000..5e1fff8756f2c --- /dev/null +++ b/stdlib/Base64/src/buffer.jl @@ -0,0 +1,38 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +# Data buffer for pipes. +mutable struct Buffer + data::Vector{UInt8} + ptr::Ptr{UInt8} + size::Int + + function Buffer(bufsize) + data = Vector{UInt8}(bufsize) + return new(data, pointer(data), 0) + end +end + +Base.empty!(buffer::Buffer) = buffer.size = 0 +Base.getindex(buffer::Buffer, i::Integer) = unsafe_load(buffer.ptr, i) +Base.setindex!(buffer::Buffer, v::UInt8, i::Integer) = unsafe_store!(buffer.ptr, v, i) +Base.endof(buffer::Buffer) = buffer.size +Base.pointer(buffer::Buffer) = buffer.ptr +capacity(buffer::Buffer) = Int(pointer(buffer.data, endof(buffer.data) + 1) - buffer.ptr) + +function consumed!(buffer::Buffer, n::Integer) + @assert n ≤ buffer.size + buffer.ptr += n + buffer.size -= n +end + +function read_to_buffer(io::IO, buffer::Buffer) + offset = buffer.ptr - pointer(buffer.data) + copy!(buffer.data, 1, buffer.data, offset, buffer.size) + buffer.ptr = pointer(buffer.data) + buffer.size + if !eof(io) + n = min(nb_available(io), capacity(buffer) - buffer.size) + unsafe_read(io, buffer.ptr + buffer.size, n) + buffer.size += n + end + return +end diff --git a/stdlib/Base64/src/decode.jl b/stdlib/Base64/src/decode.jl new file mode 100644 index 0000000000000..f9cb2a40ff650 --- /dev/null +++ b/stdlib/Base64/src/decode.jl @@ -0,0 +1,176 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +const BASE64_CODE_END = 0x40 +const BASE64_CODE_PAD = 0x41 +const BASE64_CODE_IGN = 0x42 +const BASE64_DECODE = fill(BASE64_CODE_IGN, 256) +for (i, c) in enumerate(BASE64_ENCODE) + BASE64_DECODE[Int(c)+1] = UInt8(i - 1) +end +BASE64_DECODE[Int(encodepadding())+1] = BASE64_CODE_PAD +decode(x::UInt8) = BASE64_DECODE[x + 1] + +struct Base64DecodePipe <: IO + io::IO + buffer::Buffer + rest::Vector{UInt} + + function Base64DecodePipe(io::IO) + buffer = Buffer(512) + pipe = new(io, buffer, UInt8[]) + finalizer(buffer, _ -> close(pipe)) + return pipe + end +end + +function Base.unsafe_read(pipe::Base64DecodePipe, ptr::Ptr{UInt8}, n::UInt) + p = read_avail(pipe, ptr, n) + if p < ptr + n + throw(EOFError()) + end + return nothing +end + +function read_avail(pipe::Base64DecodePipe, ptr::Ptr{UInt8}, n::UInt) + p = ptr + p_end = ptr + n + while !isempty(pipe.rest) && p < p_end + unsafe_store!(p, shift!(pipe.rest)) + p += 1 + end + + buffer = pipe.buffer + i = 0 + b1 = b2 = b3 = b4 = BASE64_CODE_IGN + while true + if b1 < 0x40 && b2 < 0x40 && b3 < 0x40 && b4 < 0x40 && p + 2 < p_end + # fast path to decode + unsafe_store!(p , b1 << 2 | b2 >> 4) + unsafe_store!(p + 1, b2 << 4 | b3 >> 2) + unsafe_store!(p + 2, b3 << 6 | b4 ) + p += 3 + else + i, p, finished = decode_slow(b1, b2, b3, b4, buffer, i, pipe.io, p, p_end - p, pipe.rest) + if finished + break + end + end + if p < p_end + if i + 4 ≤ endof(buffer) + b1 = decode(buffer[i+1]) + b2 = decode(buffer[i+2]) + b3 = decode(buffer[i+3]) + b4 = decode(buffer[i+4]) + i += 4 + else + consumed!(buffer, i) + read_to_buffer(pipe.io, buffer) + i = 0 + b1 = b2 = b3 = b4 = BASE64_CODE_IGN + end + else + break + end + end + consumed!(buffer, i) + + return p +end + +function Base.read(pipe::Base64DecodePipe, ::Type{UInt8}) + if isempty(pipe.rest) + unsafe_read(pipe, convert(Ptr{UInt8}, C_NULL), 0) + if isempty(pipe.rest) + throw(EOFError()) + end + end + return shift!(pipe.rest) +end + +function Base.readbytes!(pipe::Base64DecodePipe, data::AbstractVector{UInt8}, nb::Integer=length(data)) + filled::Int = 0 + while filled < nb && !eof(pipe) + if length(data) == filled + resize!(data, min(length(data) * 2, nb)) + end + p = pointer(data, filled + 1) + p_end = read_avail(pipe, p, UInt(min(length(data), nb) - filled)) + filled += p_end - p + end + resize!(data, filled) + return filled +end + +Base.eof(pipe::Base64DecodePipe) = isempty(pipe.rest) && eof(pipe.io) +Base.close(pipe::Base64DecodePipe) = nothing + +# Decode data from (b1, b2, b3, b5, buffer, input) into (ptr, rest). +function decode_slow(b1, b2, b3, b4, buffer, i, input, ptr, n, rest) + # Skip ignore code. + while true + if b1 == BASE64_CODE_IGN + b1, b2, b3 = b2, b3, b4 + elseif b2 == BASE64_CODE_IGN + b2, b3 = b3, b4 + elseif b3 == BASE64_CODE_IGN + b3 = b4 + elseif b4 == BASE64_CODE_IGN + # pass + else + break + end + if i + 1 ≤ endof(buffer) + b4 = decode(buffer[i+=1]) + elseif !eof(input) + b4 = decode(read(input, UInt8)) + else + b4 = BASE64_CODE_END + break + end + end + + # Check the decoded quadruplet. + k = 0 + finished = false + if b1 < 0x40 && b2 < 0x40 && b3 < 0x40 && b4 < 0x40 + # pass + k = 3 + elseif b1 < 0x40 && b2 < 0x40 && b3 < 0x40 && b4 == BASE64_CODE_PAD + b4 = 0x00 + k = 2 + elseif b1 < 0x40 && b2 < 0x40 && b3 == b4 == BASE64_CODE_PAD + b3 = b4 = 0x00 + k = 1 + elseif b1 == b2 == b3 == BASE64_CODE_IGN && b4 == BASE64_CODE_END + b1 = b2 = b3 = b4 = 0x00 + finished = true + else + throw(ArgumentError("malformed base64 sequence")) + end + + # Write output. + p = ptr + p_end = ptr + n + function output(b) + if p < p_end + unsafe_store!(p, b) + p += 1 + else + push!(rest, b) + end + end + k ≥ 1 && output(b1 << 2 | b2 >> 4) + k ≥ 2 && output(b2 << 4 | b3 >> 2) + k ≥ 3 && output(b3 << 6 | b4 ) + + return i, p, finished +end + +function base64decode(s) + b = IOBuffer(s) + try + return read(Base64DecodePipe(b)) + finally + close(b) + end +end diff --git a/stdlib/Base64/src/encode.jl b/stdlib/Base64/src/encode.jl new file mode 100644 index 0000000000000..0c1acec1022d6 --- /dev/null +++ b/stdlib/Base64/src/encode.jl @@ -0,0 +1,166 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +const BASE64_ENCODE = [UInt8(x) for x in ['A':'Z'; 'a':'z'; '0':'9'; '+'; '/']] +encode(x::UInt8) = BASE64_ENCODE[(x & 0x3f) + 1] +encodepadding() = UInt8('=') + +struct Base64EncodePipe <: IO + io::IO + buffer::Buffer + + function Base64EncodePipe(io::IO) + # The buffer size must be at least 3. + buffer = Buffer(512) + pipe = new(io, buffer) + finalizer(buffer, _ -> close(pipe)) + return pipe + end +end + +function Base.unsafe_write(pipe::Base64EncodePipe, ptr::Ptr{UInt8}, n::UInt)::Int + buffer = pipe.buffer + m = buffer.size + b1, b2, b3, k = loadtriplet!(buffer, ptr, n) + @assert k ≥ m + p = ptr + k - m + if k < 3 + if k == 1 + buffer[1] = b1 + buffer.size = 1 + elseif k == 2 + buffer[1] = b1 + buffer[2] = b2 + buffer.size = 2 + end + return p - ptr + end + @assert buffer.size == 0 + + capacity = length(buffer.data) + i = 0 + p_end = ptr + n + while true + buffer[i+1] = encode(b1 >> 2 ) + buffer[i+2] = encode(b1 << 4 | b2 >> 4) + buffer[i+3] = encode(b2 << 2 | b3 >> 6) + buffer[i+4] = encode( b3 ) + i += 4 + if p + 2 < p_end + b1 = unsafe_load(p, 1) + b2 = unsafe_load(p, 2) + b3 = unsafe_load(p, 3) + p += 3 + else + break + end + if i + 4 > capacity + unsafe_write(pipe.io, pointer(buffer), i) + i = 0 + end + end + if i > 0 + unsafe_write(pipe.io, pointer(buffer), i) + end + + while p < p_end + buffer[buffer.size+=1] = unsafe_load(p, 1) + p += 1 + end + return p - ptr +end + +function Base.write(pipe::Base64EncodePipe, x::UInt8) + buffer = pipe.buffer + buffer[buffer.size+=1] = x + if buffer.size == 3 + unsafe_write(pipe, C_NULL, 0) + end + return 1 +end + +function Base.close(pipe::Base64EncodePipe) + b1, b2, b3, k = loadtriplet!(pipe.buffer, convert(Ptr{UInt8}, C_NULL), 0) + if k == 0 + # no leftover and padding + elseif k == 1 + write(pipe.io, + encode(b1 >> 2), + encode(b1 << 4), + encodepadding(), + encodepadding()) + elseif k == 2 + write(pipe.io, + encode( b1 >> 2), + encode(b1 << 4 | b2 >> 4), + encode(b2 << 2 ), + encodepadding()) + else + @assert k == 3 + write(pipe.io, + encode(b1 >> 2 ), + encode(b1 << 4 | b2 >> 4), + encode(b2 << 2 | b3 >> 6), + encode( b3 )) + end + return nothing +end + +# Load three bytes from buffer and ptr. +function loadtriplet!(buffer::Buffer, ptr::Ptr{UInt8}, n::Integer) + b1 = b2 = b3 = 0x00 + if buffer.size == 0 + if n == 0 + k = 0 + elseif n == 1 + b1 = unsafe_load(ptr, 1) + k = 1 + elseif n == 2 + b1 = unsafe_load(ptr, 1) + b2 = unsafe_load(ptr, 2) + k = 2 + else + b1 = unsafe_load(ptr, 1) + b2 = unsafe_load(ptr, 2) + b3 = unsafe_load(ptr, 3) + k = 3 + end + elseif buffer.size == 1 + b1 = buffer[1] + if n == 0 + k = 1 + elseif n == 1 + b2 = unsafe_load(ptr, 1) + k = 2 + else + b2 = unsafe_load(ptr, 1) + b3 = unsafe_load(ptr, 2) + k = 3 + end + elseif buffer.size == 2 + b1 = buffer[1] + b2 = buffer[2] + if n == 0 + k = 2 + else + b3 = unsafe_load(ptr, 1) + k = 3 + end + else + @assert buffer.size == 3 + b1 = buffer[1] + b2 = buffer[2] + b3 = buffer[3] + k = 3 + end + empty!(buffer) + return b1, b2, b3, k +end + +function base64encode(f::Function, args...) + s = IOBuffer() + b = Base64EncodePipe(s) + f(b, args...) + close(b) + return String(take!(s)) +end +base64encode(args...) = base64encode(write, args...) From 04037ee2b867ca9484e543602cedfa57033f5355 Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Mon, 23 Oct 2017 18:54:45 +0900 Subject: [PATCH 03/12] add docs --- stdlib/Base64/docs/src/index.md | 8 +++++++ stdlib/Base64/src/decode.jl | 42 +++++++++++++++++++++++++++++++++ stdlib/Base64/src/encode.jl | 38 +++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+) create mode 100644 stdlib/Base64/docs/src/index.md diff --git a/stdlib/Base64/docs/src/index.md b/stdlib/Base64/docs/src/index.md new file mode 100644 index 0000000000000..b8434151bf022 --- /dev/null +++ b/stdlib/Base64/docs/src/index.md @@ -0,0 +1,8 @@ +# Base64 + +```@docs +Base64EncodePipe +base64encode +Base64DecodePipe +base64decode +``` diff --git a/stdlib/Base64/src/decode.jl b/stdlib/Base64/src/decode.jl index f9cb2a40ff650..1a48d1456a9e5 100644 --- a/stdlib/Base64/src/decode.jl +++ b/stdlib/Base64/src/decode.jl @@ -10,6 +10,26 @@ end BASE64_DECODE[Int(encodepadding())+1] = BASE64_CODE_PAD decode(x::UInt8) = BASE64_DECODE[x + 1] +""" + Base64DecodePipe(istream) + +Returns a new read-only I/O stream, which decodes base64-encoded data read from `istream`. + +# Examples +```jldoctest +julia> io = IOBuffer(); + +julia> iob64_decode = Base64DecodePipe(io); + +julia> write(io, "SGVsbG8h") +8 + +julia> seekstart(io); + +julia> String(read(iob64_decode)) +"Hello!" +``` +""" struct Base64DecodePipe <: IO io::IO buffer::Buffer @@ -166,6 +186,28 @@ function decode_slow(b1, b2, b3, b4, buffer, i, input, ptr, n, rest) return i, p, finished end +""" + base64decode(string) + +Decodes the base64-encoded `string` and returns a `Vector{UInt8}` of the decoded bytes. + +See also [`base64encode`](@ref) + +# Examples +```jldoctest +julia> b = base64decode("SGVsbG8h") +6-element Array{UInt8,1}: + 0x48 + 0x65 + 0x6c + 0x6c + 0x6f + 0x21 + +julia> String(b) +"Hello!" +``` +""" function base64decode(s) b = IOBuffer(s) try diff --git a/stdlib/Base64/src/encode.jl b/stdlib/Base64/src/encode.jl index 0c1acec1022d6..bc1624e015d8c 100644 --- a/stdlib/Base64/src/encode.jl +++ b/stdlib/Base64/src/encode.jl @@ -4,6 +4,32 @@ const BASE64_ENCODE = [UInt8(x) for x in ['A':'Z'; 'a':'z'; '0':'9'; '+'; '/']] encode(x::UInt8) = BASE64_ENCODE[(x & 0x3f) + 1] encodepadding() = UInt8('=') +""" + Base64EncodePipe(ostream) + +Returns a new write-only I/O stream, which converts any bytes written to it into +base64-encoded ASCII bytes written to `ostream`. +Calling [`close`](@ref) on the `Base64EncodePipe` stream +is necessary to complete the encoding (but does not close `ostream`). + +# Examples +```jldoctest +julia> io = IOBuffer(); + +julia> iob64_encode = Base64EncodePipe(io); + +julia> write(iob64_encode, "Hello!") +6 + +julia> close(iob64_encode); + +julia> str = String(take!(io)) +"SGVsbG8h" + +julia> String(base64decode(str)) +"Hello!" +``` +""" struct Base64EncodePipe <: IO io::IO buffer::Buffer @@ -156,6 +182,18 @@ function loadtriplet!(buffer::Buffer, ptr::Ptr{UInt8}, n::Integer) return b1, b2, b3, k end +""" + base64encode(writefunc, args...) + base64encode(args...) + +Given a [`write`](@ref)-like function `writefunc`, which takes an I/O stream as its first argument, +`base64encode(writefunc, args...)` calls `writefunc` to write `args...` to a base64-encoded +string, and returns the string. `base64encode(args...)` is equivalent to `base64encode(write, args...)`: +it converts its arguments into bytes using the standard [`write`](@ref) functions and returns the +base64-encoded string. + +See also [`base64decode`](@ref). +""" function base64encode(f::Function, args...) s = IOBuffer() b = Base64EncodePipe(s) From c4798ae1d34e378849277623b0866a5f6965a9f6 Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Mon, 23 Oct 2017 18:58:13 +0900 Subject: [PATCH 04/12] formatting --- stdlib/Base64/src/Base64.jl | 9 +++++++++ stdlib/Base64/src/decode.jl | 9 ++++++--- stdlib/Base64/src/encode.jl | 18 ++++++++++-------- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/stdlib/Base64/src/Base64.jl b/stdlib/Base64/src/Base64.jl index 5c649a802e0f3..d6176e6e47e90 100644 --- a/stdlib/Base64/src/Base64.jl +++ b/stdlib/Base64/src/Base64.jl @@ -8,6 +8,15 @@ export Base64DecodePipe, base64decode +# Base64EncodePipe is a pipe-like IO object, which converts into base64 data +# sent to a stream. (You must close the pipe to complete the encode, separate +# from closing the target stream). We also have a function base64encode(f, +# args...) which works like sprint except that it produces base64-encoded data, +# along with base64encode(args...) which is equivalent to base64encode(write, +# args...), to return base64 strings. A Base64DecodePipe object can be used to +# decode base64-encoded data read from a stream , while function base64decode is +# useful for decoding strings + include("buffer.jl") include("encode.jl") include("decode.jl") diff --git a/stdlib/Base64/src/decode.jl b/stdlib/Base64/src/decode.jl index 1a48d1456a9e5..e350c1909e438 100644 --- a/stdlib/Base64/src/decode.jl +++ b/stdlib/Base64/src/decode.jl @@ -1,5 +1,6 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +# Generate decode table. const BASE64_CODE_END = 0x40 const BASE64_CODE_PAD = 0x41 const BASE64_CODE_IGN = 0x42 @@ -13,7 +14,8 @@ decode(x::UInt8) = BASE64_DECODE[x + 1] """ Base64DecodePipe(istream) -Returns a new read-only I/O stream, which decodes base64-encoded data read from `istream`. +Returns a new read-only I/O stream, which decodes base64-encoded data read from +`istream`. # Examples ```jldoctest @@ -189,9 +191,10 @@ end """ base64decode(string) -Decodes the base64-encoded `string` and returns a `Vector{UInt8}` of the decoded bytes. +Decodes the base64-encoded `string` and returns a `Vector{UInt8}` of the decoded +bytes. -See also [`base64encode`](@ref) +See also [`base64encode`](@ref). # Examples ```jldoctest diff --git a/stdlib/Base64/src/encode.jl b/stdlib/Base64/src/encode.jl index bc1624e015d8c..585b34ca561ec 100644 --- a/stdlib/Base64/src/encode.jl +++ b/stdlib/Base64/src/encode.jl @@ -1,5 +1,6 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +# Generate encode table. const BASE64_ENCODE = [UInt8(x) for x in ['A':'Z'; 'a':'z'; '0':'9'; '+'; '/']] encode(x::UInt8) = BASE64_ENCODE[(x & 0x3f) + 1] encodepadding() = UInt8('=') @@ -8,9 +9,9 @@ encodepadding() = UInt8('=') Base64EncodePipe(ostream) Returns a new write-only I/O stream, which converts any bytes written to it into -base64-encoded ASCII bytes written to `ostream`. -Calling [`close`](@ref) on the `Base64EncodePipe` stream -is necessary to complete the encoding (but does not close `ostream`). +base64-encoded ASCII bytes written to `ostream`. Calling [`close`](@ref) on the +`Base64EncodePipe` stream is necessary to complete the encoding (but does not +close `ostream`). # Examples ```jldoctest @@ -186,11 +187,12 @@ end base64encode(writefunc, args...) base64encode(args...) -Given a [`write`](@ref)-like function `writefunc`, which takes an I/O stream as its first argument, -`base64encode(writefunc, args...)` calls `writefunc` to write `args...` to a base64-encoded -string, and returns the string. `base64encode(args...)` is equivalent to `base64encode(write, args...)`: -it converts its arguments into bytes using the standard [`write`](@ref) functions and returns the -base64-encoded string. +Given a [`write`](@ref)-like function `writefunc`, which takes an I/O stream as +its first argument, `base64encode(writefunc, args...)` calls `writefunc` to +write `args...` to a base64-encoded string, and returns the string. +`base64encode(args...)` is equivalent to `base64encode(write, args...)`: it +converts its arguments into bytes using the standard [`write`](@ref) functions +and returns the base64-encoded string. See also [`base64decode`](@ref). """ From 0b39e7b1ece561e3130064f46fd5e2ee5cda3a8e Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Mon, 23 Oct 2017 20:23:00 +0900 Subject: [PATCH 05/12] fix type --- stdlib/Base64/src/decode.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/Base64/src/decode.jl b/stdlib/Base64/src/decode.jl index e350c1909e438..7bdaca0bd23bd 100644 --- a/stdlib/Base64/src/decode.jl +++ b/stdlib/Base64/src/decode.jl @@ -35,7 +35,7 @@ julia> String(read(iob64_decode)) struct Base64DecodePipe <: IO io::IO buffer::Buffer - rest::Vector{UInt} + rest::Vector{UInt8} function Base64DecodePipe(io::IO) buffer = Buffer(512) From ae3415696dc54551e8809c317a36412ee18aac22 Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Mon, 23 Oct 2017 20:38:49 +0900 Subject: [PATCH 06/12] circumvent inference problem --- stdlib/Base64/src/decode.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stdlib/Base64/src/decode.jl b/stdlib/Base64/src/decode.jl index 7bdaca0bd23bd..bbbc6828b8293 100644 --- a/stdlib/Base64/src/decode.jl +++ b/stdlib/Base64/src/decode.jl @@ -171,8 +171,8 @@ function decode_slow(b1, b2, b3, b4, buffer, i, input, ptr, n, rest) end # Write output. - p = ptr - p_end = ptr + n + p::Ptr{UInt8} = ptr + p_end::Ptr{UInt8} = ptr + n function output(b) if p < p_end unsafe_store!(p, b) From 80e85ec3e318a5af78174bf0a6769804dd02ca04 Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Mon, 23 Oct 2017 20:49:34 +0900 Subject: [PATCH 07/12] clean up --- stdlib/Base64/src/decode.jl | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/stdlib/Base64/src/decode.jl b/stdlib/Base64/src/decode.jl index bbbc6828b8293..267f5a451d971 100644 --- a/stdlib/Base64/src/decode.jl +++ b/stdlib/Base64/src/decode.jl @@ -46,14 +46,15 @@ struct Base64DecodePipe <: IO end function Base.unsafe_read(pipe::Base64DecodePipe, ptr::Ptr{UInt8}, n::UInt) - p = read_avail(pipe, ptr, n) + p = read_until_end(pipe, ptr, n) if p < ptr + n throw(EOFError()) end return nothing end -function read_avail(pipe::Base64DecodePipe, ptr::Ptr{UInt8}, n::UInt) +# Read and decode as much data as possible. +function read_until_end(pipe::Base64DecodePipe, ptr::Ptr{UInt8}, n::UInt) p = ptr p_end = ptr + n while !isempty(pipe.rest) && p < p_end @@ -72,8 +73,8 @@ function read_avail(pipe::Base64DecodePipe, ptr::Ptr{UInt8}, n::UInt) unsafe_store!(p + 2, b3 << 6 | b4 ) p += 3 else - i, p, finished = decode_slow(b1, b2, b3, b4, buffer, i, pipe.io, p, p_end - p, pipe.rest) - if finished + i, p, ended = decode_slow(b1, b2, b3, b4, buffer, i, pipe.io, p, p_end - p, pipe.rest) + if ended break end end @@ -116,7 +117,7 @@ function Base.readbytes!(pipe::Base64DecodePipe, data::AbstractVector{UInt8}, nb resize!(data, min(length(data) * 2, nb)) end p = pointer(data, filled + 1) - p_end = read_avail(pipe, p, UInt(min(length(data), nb) - filled)) + p_end = read_until_end(pipe, p, UInt(min(length(data), nb) - filled)) filled += p_end - p end resize!(data, filled) @@ -153,7 +154,7 @@ function decode_slow(b1, b2, b3, b4, buffer, i, input, ptr, n, rest) # Check the decoded quadruplet. k = 0 - finished = false + ended = false if b1 < 0x40 && b2 < 0x40 && b3 < 0x40 && b4 < 0x40 # pass k = 3 @@ -165,14 +166,14 @@ function decode_slow(b1, b2, b3, b4, buffer, i, input, ptr, n, rest) k = 1 elseif b1 == b2 == b3 == BASE64_CODE_IGN && b4 == BASE64_CODE_END b1 = b2 = b3 = b4 = 0x00 - finished = true + ended = true else throw(ArgumentError("malformed base64 sequence")) end # Write output. p::Ptr{UInt8} = ptr - p_end::Ptr{UInt8} = ptr + n + p_end = ptr + n function output(b) if p < p_end unsafe_store!(p, b) @@ -185,7 +186,7 @@ function decode_slow(b1, b2, b3, b4, buffer, i, input, ptr, n, rest) k ≥ 2 && output(b2 << 4 | b3 >> 2) k ≥ 3 && output(b3 << 6 | b4 ) - return i, p, finished + return i, p, ended end """ From 4dfe963adc2add04dc494ee8f314023b08c86fb6 Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Mon, 23 Oct 2017 21:43:09 +0900 Subject: [PATCH 08/12] fix --- stdlib/Base64/src/decode.jl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/stdlib/Base64/src/decode.jl b/stdlib/Base64/src/decode.jl index 267f5a451d971..e79d9127895eb 100644 --- a/stdlib/Base64/src/decode.jl +++ b/stdlib/Base64/src/decode.jl @@ -154,7 +154,6 @@ function decode_slow(b1, b2, b3, b4, buffer, i, input, ptr, n, rest) # Check the decoded quadruplet. k = 0 - ended = false if b1 < 0x40 && b2 < 0x40 && b3 < 0x40 && b4 < 0x40 # pass k = 3 @@ -166,7 +165,6 @@ function decode_slow(b1, b2, b3, b4, buffer, i, input, ptr, n, rest) k = 1 elseif b1 == b2 == b3 == BASE64_CODE_IGN && b4 == BASE64_CODE_END b1 = b2 = b3 = b4 = 0x00 - ended = true else throw(ArgumentError("malformed base64 sequence")) end @@ -186,7 +184,7 @@ function decode_slow(b1, b2, b3, b4, buffer, i, input, ptr, n, rest) k ≥ 2 && output(b2 << 4 | b3 >> 2) k ≥ 3 && output(b3 << 6 | b4 ) - return i, p, ended + return i, p, k == 0 end """ From 3e546f921c9f0223f53ea290dc69a4f4b916af47 Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Mon, 23 Oct 2017 22:43:11 +0900 Subject: [PATCH 09/12] fix2 --- stdlib/Base64/src/decode.jl | 11 ++++------- stdlib/Base64/src/encode.jl | 13 ++++++------- stdlib/Base64/test/runtests.jl | 14 ++++++++++++++ 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/stdlib/Base64/src/decode.jl b/stdlib/Base64/src/decode.jl index e79d9127895eb..ec1ae553a2682 100644 --- a/stdlib/Base64/src/decode.jl +++ b/stdlib/Base64/src/decode.jl @@ -9,12 +9,12 @@ for (i, c) in enumerate(BASE64_ENCODE) BASE64_DECODE[Int(c)+1] = UInt8(i - 1) end BASE64_DECODE[Int(encodepadding())+1] = BASE64_CODE_PAD -decode(x::UInt8) = BASE64_DECODE[x + 1] +decode(x::UInt8) = @inbounds return BASE64_DECODE[x + 1] """ Base64DecodePipe(istream) -Returns a new read-only I/O stream, which decodes base64-encoded data read from +Return a new read-only I/O stream, which decodes base64-encoded data read from `istream`. # Examples @@ -39,9 +39,7 @@ struct Base64DecodePipe <: IO function Base64DecodePipe(io::IO) buffer = Buffer(512) - pipe = new(io, buffer, UInt8[]) - finalizer(buffer, _ -> close(pipe)) - return pipe + return new(io, buffer, UInt8[]) end end @@ -155,7 +153,6 @@ function decode_slow(b1, b2, b3, b4, buffer, i, input, ptr, n, rest) # Check the decoded quadruplet. k = 0 if b1 < 0x40 && b2 < 0x40 && b3 < 0x40 && b4 < 0x40 - # pass k = 3 elseif b1 < 0x40 && b2 < 0x40 && b3 < 0x40 && b4 == BASE64_CODE_PAD b4 = 0x00 @@ -190,7 +187,7 @@ end """ base64decode(string) -Decodes the base64-encoded `string` and returns a `Vector{UInt8}` of the decoded +Decode the base64-encoded `string` and returns a `Vector{UInt8}` of the decoded bytes. See also [`base64encode`](@ref). diff --git a/stdlib/Base64/src/encode.jl b/stdlib/Base64/src/encode.jl index 585b34ca561ec..37dda446cd525 100644 --- a/stdlib/Base64/src/encode.jl +++ b/stdlib/Base64/src/encode.jl @@ -2,13 +2,13 @@ # Generate encode table. const BASE64_ENCODE = [UInt8(x) for x in ['A':'Z'; 'a':'z'; '0':'9'; '+'; '/']] -encode(x::UInt8) = BASE64_ENCODE[(x & 0x3f) + 1] +encode(x::UInt8) = @inbounds return BASE64_ENCODE[(x & 0x3f) + 1] encodepadding() = UInt8('=') """ Base64EncodePipe(ostream) -Returns a new write-only I/O stream, which converts any bytes written to it into +Return a new write-only I/O stream, which converts any bytes written to it into base64-encoded ASCII bytes written to `ostream`. Calling [`close`](@ref) on the `Base64EncodePipe` stream is necessary to complete the encoding (but does not close `ostream`). @@ -63,7 +63,6 @@ function Base.unsafe_write(pipe::Base64EncodePipe, ptr::Ptr{UInt8}, n::UInt)::In end @assert buffer.size == 0 - capacity = length(buffer.data) i = 0 p_end = ptr + n while true @@ -80,7 +79,7 @@ function Base.unsafe_write(pipe::Base64EncodePipe, ptr::Ptr{UInt8}, n::UInt)::In else break end - if i + 4 > capacity + if i + 4 > capacity(buffer) unsafe_write(pipe.io, pointer(buffer), i) i = 0 end @@ -90,7 +89,7 @@ function Base.unsafe_write(pipe::Base64EncodePipe, ptr::Ptr{UInt8}, n::UInt)::In end while p < p_end - buffer[buffer.size+=1] = unsafe_load(p, 1) + buffer[buffer.size+=1] = unsafe_load(p) p += 1 end return p - ptr @@ -106,7 +105,7 @@ function Base.write(pipe::Base64EncodePipe, x::UInt8) end function Base.close(pipe::Base64EncodePipe) - b1, b2, b3, k = loadtriplet!(pipe.buffer, convert(Ptr{UInt8}, C_NULL), 0) + b1, b2, b3, k = loadtriplet!(pipe.buffer, Ptr{UInt8}(C_NULL), UInt(0)) if k == 0 # no leftover and padding elseif k == 1 @@ -133,7 +132,7 @@ function Base.close(pipe::Base64EncodePipe) end # Load three bytes from buffer and ptr. -function loadtriplet!(buffer::Buffer, ptr::Ptr{UInt8}, n::Integer) +function loadtriplet!(buffer::Buffer, ptr::Ptr{UInt8}, n::UInt) b1 = b2 = b3 = 0x00 if buffer.size == 0 if n == 0 diff --git a/stdlib/Base64/test/runtests.jl b/stdlib/Base64/test/runtests.jl index 4c2b38b70efc3..8eab331ee6b1b 100644 --- a/stdlib/Base64/test/runtests.jl +++ b/stdlib/Base64/test/runtests.jl @@ -31,6 +31,20 @@ ZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4=""" end rm(fname) + # Byte-by-byte encode and decode. + buf = IOBuffer() + pipe = Base64EncodePipe(buf) + for char in inputText + write(pipe, UInt8(char)) + end + close(pipe) + pipe = Base64DecodePipe(IOBuffer(take!(buf))) + decoded = UInt8[] + while !eof(pipe) + push!(decoded, read(pipe, UInt8)) + end + @test String(decoded) == inputText + # Encode to string and decode @test String(base64decode(base64encode(inputText))) == inputText From a63435cfb27e4320ed83a4058e218a6cb1d0cdfe Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Tue, 24 Oct 2017 02:13:14 +0900 Subject: [PATCH 10/12] fix doc/make.jl --- doc/make.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/make.jl b/doc/make.jl index e08f8a8ce17b4..04946b8f75df2 100644 --- a/doc/make.jl +++ b/doc/make.jl @@ -25,11 +25,13 @@ if Sys.iswindows() cp_q("../stdlib/Test/docs/src/index.md", "src/stdlib/test.md") cp_q("../stdlib/Mmap/docs/src/index.md", "src/stdlib/mmap.md") cp_q("../stdlib/SharedArrays/docs/src/index.md", "src/stdlib/sharedarrays.md") + cp_q("../stdlib/Base64/docs/src/index.md", "src/stdlib/base64.md") else symlink_q("../../../stdlib/DelimitedFiles/docs/src/index.md", "src/stdlib/delimitedfiles.md") symlink_q("../../../stdlib/Test/docs/src/index.md", "src/stdlib/test.md") symlink_q("../../../stdlib/Mmap/docs/src/index.md", "src/stdlib/mmap.md") symlink_q("../../../stdlib/SharedArrays/docs/src/index.md", "src/stdlib/sharedarrays.md") + symlink_q("../../../stdlib/Base64/docs/src/index.md", "src/stdlib/base64.md") end const PAGES = [ @@ -133,11 +135,11 @@ const PAGES = [ ], ] -using DelimitedFiles, Test, Mmap, SharedArrays +using DelimitedFiles, Test, Mmap, SharedArrays, Base64 makedocs( build = joinpath(pwd(), "_build/html/en"), - modules = [Base, Core, BuildSysImg, DelimitedFiles, Test, Mmap, SharedArrays], + modules = [Base, Core, BuildSysImg, DelimitedFiles, Test, Mmap, SharedArrays, Base64], clean = false, doctest = "doctest" in ARGS, linkcheck = "linkcheck" in ARGS, From 9d69b43c63fa7c257a0412d9f6dbcfb654d94dca Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Tue, 24 Oct 2017 02:31:48 +0900 Subject: [PATCH 11/12] temporarily unexport names from Base64 to build docs --- stdlib/Base64/docs/src/index.md | 8 ++++---- stdlib/Base64/src/Base64.jl | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/stdlib/Base64/docs/src/index.md b/stdlib/Base64/docs/src/index.md index b8434151bf022..4b4eb6e8cf571 100644 --- a/stdlib/Base64/docs/src/index.md +++ b/stdlib/Base64/docs/src/index.md @@ -1,8 +1,8 @@ # Base64 ```@docs -Base64EncodePipe -base64encode -Base64DecodePipe -base64decode +Base64.Base64EncodePipe +Base64.base64encode +Base64.Base64DecodePipe +Base64.base64decode ``` diff --git a/stdlib/Base64/src/Base64.jl b/stdlib/Base64/src/Base64.jl index d6176e6e47e90..b065011ae36b0 100644 --- a/stdlib/Base64/src/Base64.jl +++ b/stdlib/Base64/src/Base64.jl @@ -2,11 +2,13 @@ module Base64 +#= export Base64EncodePipe, base64encode, Base64DecodePipe, base64decode +=# # Base64EncodePipe is a pipe-like IO object, which converts into base64 data # sent to a stream. (You must close the pipe to complete the encode, separate From 9d46c87278988f132386d1f359a9a7d0596f036e Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Tue, 24 Oct 2017 02:48:53 +0900 Subject: [PATCH 12/12] fix doc/make.jl --- doc/make.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/make.jl b/doc/make.jl index 04946b8f75df2..637eee981ec99 100644 --- a/doc/make.jl +++ b/doc/make.jl @@ -101,6 +101,7 @@ const PAGES = [ "stdlib/profile.md", "stdlib/stacktraces.md", "stdlib/simd-types.md", + "stdlib/base64.md", ], "Developer Documentation" => [ "devdocs/reflection.md",