From 4a606f10306feef419a226cf3a51cfe1000b029b Mon Sep 17 00:00:00 2001 From: Kenta Sato Date: Mon, 23 Oct 2017 18:43:15 +0900 Subject: [PATCH] add Base64 module --- doc/make.jl | 7 +- stdlib/Base64/docs/src/index.md | 8 ++ stdlib/Base64/src/Base64.jl | 26 ++++ stdlib/Base64/src/buffer.jl | 38 ++++++ stdlib/Base64/src/decode.jl | 217 ++++++++++++++++++++++++++++++++ stdlib/Base64/src/encode.jl | 205 ++++++++++++++++++++++++++++++ stdlib/Base64/test/runtests.jl | 77 ++++++++++++ 7 files changed, 576 insertions(+), 2 deletions(-) create mode 100644 stdlib/Base64/docs/src/index.md create mode 100644 stdlib/Base64/src/Base64.jl create mode 100644 stdlib/Base64/src/buffer.jl create mode 100644 stdlib/Base64/src/decode.jl create mode 100644 stdlib/Base64/src/encode.jl create mode 100644 stdlib/Base64/test/runtests.jl diff --git a/doc/make.jl b/doc/make.jl index 102ca3969301c..f4b4512770108 100644 --- a/doc/make.jl +++ b/doc/make.jl @@ -26,12 +26,14 @@ if Sys.iswindows() cp_q("../stdlib/Mmap/docs/src/index.md", "src/stdlib/mmap.md") cp_q("../stdlib/SharedArrays/docs/src/index.md", "src/stdlib/sharedarrays.md") cp_q("../stdlib/Profile/docs/src/index.md", "src/stdlib/profile.md") + cp_q("../stdlib/Base64/docs/src/index.md", "src/stdlib/base64.md") else symlink_q("../../../stdlib/DelimitedFiles/docs/src/index.md", "src/stdlib/delimitedfiles.md") symlink_q("../../../stdlib/Test/docs/src/index.md", "src/stdlib/test.md") symlink_q("../../../stdlib/Mmap/docs/src/index.md", "src/stdlib/mmap.md") symlink_q("../../../stdlib/SharedArrays/docs/src/index.md", "src/stdlib/sharedarrays.md") symlink_q("../../../stdlib/Profile/docs/src/index.md", "src/stdlib/profile.md") + symlink_q("../../../stdlib/Base64/docs/src/index.md", "src/stdlib/base64.md") end const PAGES = [ @@ -101,6 +103,7 @@ const PAGES = [ "stdlib/profile.md", "stdlib/stacktraces.md", "stdlib/simd-types.md", + "stdlib/base64.md", ], "Developer Documentation" => [ "devdocs/reflection.md", @@ -135,11 +138,11 @@ const PAGES = [ ], ] -using DelimitedFiles, Test, Mmap, SharedArrays, Profile +using DelimitedFiles, Test, Mmap, SharedArrays, Profile, Base64 makedocs( build = joinpath(pwd(), "_build/html/en"), - modules = [Base, Core, BuildSysImg, DelimitedFiles, Test, Mmap, SharedArrays, Profile], + modules = [Base, Core, BuildSysImg, DelimitedFiles, Test, Mmap, SharedArrays, Profile, Base64], clean = false, doctest = "doctest" in ARGS, linkcheck = "linkcheck" in ARGS, diff --git a/stdlib/Base64/docs/src/index.md b/stdlib/Base64/docs/src/index.md new file mode 100644 index 0000000000000..4b4eb6e8cf571 --- /dev/null +++ b/stdlib/Base64/docs/src/index.md @@ -0,0 +1,8 @@ +# Base64 + +```@docs +Base64.Base64EncodePipe +Base64.base64encode +Base64.Base64DecodePipe +Base64.base64decode +``` diff --git a/stdlib/Base64/src/Base64.jl b/stdlib/Base64/src/Base64.jl new file mode 100644 index 0000000000000..b065011ae36b0 --- /dev/null +++ b/stdlib/Base64/src/Base64.jl @@ -0,0 +1,26 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +module Base64 + +#= +export + Base64EncodePipe, + base64encode, + Base64DecodePipe, + base64decode +=# + +# Base64EncodePipe is a pipe-like IO object, which converts into base64 data +# sent to a stream. (You must close the pipe to complete the encode, separate +# from closing the target stream). We also have a function base64encode(f, +# args...) which works like sprint except that it produces base64-encoded data, +# along with base64encode(args...) which is equivalent to base64encode(write, +# args...), to return base64 strings. A Base64DecodePipe object can be used to +# decode base64-encoded data read from a stream , while function base64decode is +# useful for decoding strings + +include("buffer.jl") +include("encode.jl") +include("decode.jl") + +end diff --git a/stdlib/Base64/src/buffer.jl b/stdlib/Base64/src/buffer.jl new file mode 100644 index 0000000000000..5e1fff8756f2c --- /dev/null +++ b/stdlib/Base64/src/buffer.jl @@ -0,0 +1,38 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +# Data buffer for pipes. +mutable struct Buffer + data::Vector{UInt8} + ptr::Ptr{UInt8} + size::Int + + function Buffer(bufsize) + data = Vector{UInt8}(bufsize) + return new(data, pointer(data), 0) + end +end + +Base.empty!(buffer::Buffer) = buffer.size = 0 +Base.getindex(buffer::Buffer, i::Integer) = unsafe_load(buffer.ptr, i) +Base.setindex!(buffer::Buffer, v::UInt8, i::Integer) = unsafe_store!(buffer.ptr, v, i) +Base.endof(buffer::Buffer) = buffer.size +Base.pointer(buffer::Buffer) = buffer.ptr +capacity(buffer::Buffer) = Int(pointer(buffer.data, endof(buffer.data) + 1) - buffer.ptr) + +function consumed!(buffer::Buffer, n::Integer) + @assert n ≤ buffer.size + buffer.ptr += n + buffer.size -= n +end + +function read_to_buffer(io::IO, buffer::Buffer) + offset = buffer.ptr - pointer(buffer.data) + copy!(buffer.data, 1, buffer.data, offset, buffer.size) + buffer.ptr = pointer(buffer.data) + buffer.size + if !eof(io) + n = min(nb_available(io), capacity(buffer) - buffer.size) + unsafe_read(io, buffer.ptr + buffer.size, n) + buffer.size += n + end + return +end diff --git a/stdlib/Base64/src/decode.jl b/stdlib/Base64/src/decode.jl new file mode 100644 index 0000000000000..ec1ae553a2682 --- /dev/null +++ b/stdlib/Base64/src/decode.jl @@ -0,0 +1,217 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +# Generate decode table. +const BASE64_CODE_END = 0x40 +const BASE64_CODE_PAD = 0x41 +const BASE64_CODE_IGN = 0x42 +const BASE64_DECODE = fill(BASE64_CODE_IGN, 256) +for (i, c) in enumerate(BASE64_ENCODE) + BASE64_DECODE[Int(c)+1] = UInt8(i - 1) +end +BASE64_DECODE[Int(encodepadding())+1] = BASE64_CODE_PAD +decode(x::UInt8) = @inbounds return BASE64_DECODE[x + 1] + +""" + Base64DecodePipe(istream) + +Return a new read-only I/O stream, which decodes base64-encoded data read from +`istream`. + +# Examples +```jldoctest +julia> io = IOBuffer(); + +julia> iob64_decode = Base64DecodePipe(io); + +julia> write(io, "SGVsbG8h") +8 + +julia> seekstart(io); + +julia> String(read(iob64_decode)) +"Hello!" +``` +""" +struct Base64DecodePipe <: IO + io::IO + buffer::Buffer + rest::Vector{UInt8} + + function Base64DecodePipe(io::IO) + buffer = Buffer(512) + return new(io, buffer, UInt8[]) + end +end + +function Base.unsafe_read(pipe::Base64DecodePipe, ptr::Ptr{UInt8}, n::UInt) + p = read_until_end(pipe, ptr, n) + if p < ptr + n + throw(EOFError()) + end + return nothing +end + +# Read and decode as much data as possible. +function read_until_end(pipe::Base64DecodePipe, ptr::Ptr{UInt8}, n::UInt) + p = ptr + p_end = ptr + n + while !isempty(pipe.rest) && p < p_end + unsafe_store!(p, shift!(pipe.rest)) + p += 1 + end + + buffer = pipe.buffer + i = 0 + b1 = b2 = b3 = b4 = BASE64_CODE_IGN + while true + if b1 < 0x40 && b2 < 0x40 && b3 < 0x40 && b4 < 0x40 && p + 2 < p_end + # fast path to decode + unsafe_store!(p , b1 << 2 | b2 >> 4) + unsafe_store!(p + 1, b2 << 4 | b3 >> 2) + unsafe_store!(p + 2, b3 << 6 | b4 ) + p += 3 + else + i, p, ended = decode_slow(b1, b2, b3, b4, buffer, i, pipe.io, p, p_end - p, pipe.rest) + if ended + break + end + end + if p < p_end + if i + 4 ≤ endof(buffer) + b1 = decode(buffer[i+1]) + b2 = decode(buffer[i+2]) + b3 = decode(buffer[i+3]) + b4 = decode(buffer[i+4]) + i += 4 + else + consumed!(buffer, i) + read_to_buffer(pipe.io, buffer) + i = 0 + b1 = b2 = b3 = b4 = BASE64_CODE_IGN + end + else + break + end + end + consumed!(buffer, i) + + return p +end + +function Base.read(pipe::Base64DecodePipe, ::Type{UInt8}) + if isempty(pipe.rest) + unsafe_read(pipe, convert(Ptr{UInt8}, C_NULL), 0) + if isempty(pipe.rest) + throw(EOFError()) + end + end + return shift!(pipe.rest) +end + +function Base.readbytes!(pipe::Base64DecodePipe, data::AbstractVector{UInt8}, nb::Integer=length(data)) + filled::Int = 0 + while filled < nb && !eof(pipe) + if length(data) == filled + resize!(data, min(length(data) * 2, nb)) + end + p = pointer(data, filled + 1) + p_end = read_until_end(pipe, p, UInt(min(length(data), nb) - filled)) + filled += p_end - p + end + resize!(data, filled) + return filled +end + +Base.eof(pipe::Base64DecodePipe) = isempty(pipe.rest) && eof(pipe.io) +Base.close(pipe::Base64DecodePipe) = nothing + +# Decode data from (b1, b2, b3, b5, buffer, input) into (ptr, rest). +function decode_slow(b1, b2, b3, b4, buffer, i, input, ptr, n, rest) + # Skip ignore code. + while true + if b1 == BASE64_CODE_IGN + b1, b2, b3 = b2, b3, b4 + elseif b2 == BASE64_CODE_IGN + b2, b3 = b3, b4 + elseif b3 == BASE64_CODE_IGN + b3 = b4 + elseif b4 == BASE64_CODE_IGN + # pass + else + break + end + if i + 1 ≤ endof(buffer) + b4 = decode(buffer[i+=1]) + elseif !eof(input) + b4 = decode(read(input, UInt8)) + else + b4 = BASE64_CODE_END + break + end + end + + # Check the decoded quadruplet. + k = 0 + if b1 < 0x40 && b2 < 0x40 && b3 < 0x40 && b4 < 0x40 + k = 3 + elseif b1 < 0x40 && b2 < 0x40 && b3 < 0x40 && b4 == BASE64_CODE_PAD + b4 = 0x00 + k = 2 + elseif b1 < 0x40 && b2 < 0x40 && b3 == b4 == BASE64_CODE_PAD + b3 = b4 = 0x00 + k = 1 + elseif b1 == b2 == b3 == BASE64_CODE_IGN && b4 == BASE64_CODE_END + b1 = b2 = b3 = b4 = 0x00 + else + throw(ArgumentError("malformed base64 sequence")) + end + + # Write output. + p::Ptr{UInt8} = ptr + p_end = ptr + n + function output(b) + if p < p_end + unsafe_store!(p, b) + p += 1 + else + push!(rest, b) + end + end + k ≥ 1 && output(b1 << 2 | b2 >> 4) + k ≥ 2 && output(b2 << 4 | b3 >> 2) + k ≥ 3 && output(b3 << 6 | b4 ) + + return i, p, k == 0 +end + +""" + base64decode(string) + +Decode the base64-encoded `string` and returns a `Vector{UInt8}` of the decoded +bytes. + +See also [`base64encode`](@ref). + +# Examples +```jldoctest +julia> b = base64decode("SGVsbG8h") +6-element Array{UInt8,1}: + 0x48 + 0x65 + 0x6c + 0x6c + 0x6f + 0x21 + +julia> String(b) +"Hello!" +``` +""" +function base64decode(s) + b = IOBuffer(s) + try + return read(Base64DecodePipe(b)) + finally + close(b) + end +end diff --git a/stdlib/Base64/src/encode.jl b/stdlib/Base64/src/encode.jl new file mode 100644 index 0000000000000..37dda446cd525 --- /dev/null +++ b/stdlib/Base64/src/encode.jl @@ -0,0 +1,205 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +# Generate encode table. +const BASE64_ENCODE = [UInt8(x) for x in ['A':'Z'; 'a':'z'; '0':'9'; '+'; '/']] +encode(x::UInt8) = @inbounds return BASE64_ENCODE[(x & 0x3f) + 1] +encodepadding() = UInt8('=') + +""" + Base64EncodePipe(ostream) + +Return a new write-only I/O stream, which converts any bytes written to it into +base64-encoded ASCII bytes written to `ostream`. Calling [`close`](@ref) on the +`Base64EncodePipe` stream is necessary to complete the encoding (but does not +close `ostream`). + +# Examples +```jldoctest +julia> io = IOBuffer(); + +julia> iob64_encode = Base64EncodePipe(io); + +julia> write(iob64_encode, "Hello!") +6 + +julia> close(iob64_encode); + +julia> str = String(take!(io)) +"SGVsbG8h" + +julia> String(base64decode(str)) +"Hello!" +``` +""" +struct Base64EncodePipe <: IO + io::IO + buffer::Buffer + + function Base64EncodePipe(io::IO) + # The buffer size must be at least 3. + buffer = Buffer(512) + pipe = new(io, buffer) + finalizer(buffer, _ -> close(pipe)) + return pipe + end +end + +function Base.unsafe_write(pipe::Base64EncodePipe, ptr::Ptr{UInt8}, n::UInt)::Int + buffer = pipe.buffer + m = buffer.size + b1, b2, b3, k = loadtriplet!(buffer, ptr, n) + @assert k ≥ m + p = ptr + k - m + if k < 3 + if k == 1 + buffer[1] = b1 + buffer.size = 1 + elseif k == 2 + buffer[1] = b1 + buffer[2] = b2 + buffer.size = 2 + end + return p - ptr + end + @assert buffer.size == 0 + + i = 0 + p_end = ptr + n + while true + buffer[i+1] = encode(b1 >> 2 ) + buffer[i+2] = encode(b1 << 4 | b2 >> 4) + buffer[i+3] = encode(b2 << 2 | b3 >> 6) + buffer[i+4] = encode( b3 ) + i += 4 + if p + 2 < p_end + b1 = unsafe_load(p, 1) + b2 = unsafe_load(p, 2) + b3 = unsafe_load(p, 3) + p += 3 + else + break + end + if i + 4 > capacity(buffer) + unsafe_write(pipe.io, pointer(buffer), i) + i = 0 + end + end + if i > 0 + unsafe_write(pipe.io, pointer(buffer), i) + end + + while p < p_end + buffer[buffer.size+=1] = unsafe_load(p) + p += 1 + end + return p - ptr +end + +function Base.write(pipe::Base64EncodePipe, x::UInt8) + buffer = pipe.buffer + buffer[buffer.size+=1] = x + if buffer.size == 3 + unsafe_write(pipe, C_NULL, 0) + end + return 1 +end + +function Base.close(pipe::Base64EncodePipe) + b1, b2, b3, k = loadtriplet!(pipe.buffer, Ptr{UInt8}(C_NULL), UInt(0)) + if k == 0 + # no leftover and padding + elseif k == 1 + write(pipe.io, + encode(b1 >> 2), + encode(b1 << 4), + encodepadding(), + encodepadding()) + elseif k == 2 + write(pipe.io, + encode( b1 >> 2), + encode(b1 << 4 | b2 >> 4), + encode(b2 << 2 ), + encodepadding()) + else + @assert k == 3 + write(pipe.io, + encode(b1 >> 2 ), + encode(b1 << 4 | b2 >> 4), + encode(b2 << 2 | b3 >> 6), + encode( b3 )) + end + return nothing +end + +# Load three bytes from buffer and ptr. +function loadtriplet!(buffer::Buffer, ptr::Ptr{UInt8}, n::UInt) + b1 = b2 = b3 = 0x00 + if buffer.size == 0 + if n == 0 + k = 0 + elseif n == 1 + b1 = unsafe_load(ptr, 1) + k = 1 + elseif n == 2 + b1 = unsafe_load(ptr, 1) + b2 = unsafe_load(ptr, 2) + k = 2 + else + b1 = unsafe_load(ptr, 1) + b2 = unsafe_load(ptr, 2) + b3 = unsafe_load(ptr, 3) + k = 3 + end + elseif buffer.size == 1 + b1 = buffer[1] + if n == 0 + k = 1 + elseif n == 1 + b2 = unsafe_load(ptr, 1) + k = 2 + else + b2 = unsafe_load(ptr, 1) + b3 = unsafe_load(ptr, 2) + k = 3 + end + elseif buffer.size == 2 + b1 = buffer[1] + b2 = buffer[2] + if n == 0 + k = 2 + else + b3 = unsafe_load(ptr, 1) + k = 3 + end + else + @assert buffer.size == 3 + b1 = buffer[1] + b2 = buffer[2] + b3 = buffer[3] + k = 3 + end + empty!(buffer) + return b1, b2, b3, k +end + +""" + base64encode(writefunc, args...) + base64encode(args...) + +Given a [`write`](@ref)-like function `writefunc`, which takes an I/O stream as +its first argument, `base64encode(writefunc, args...)` calls `writefunc` to +write `args...` to a base64-encoded string, and returns the string. +`base64encode(args...)` is equivalent to `base64encode(write, args...)`: it +converts its arguments into bytes using the standard [`write`](@ref) functions +and returns the base64-encoded string. + +See also [`base64decode`](@ref). +""" +function base64encode(f::Function, args...) + s = IOBuffer() + b = Base64EncodePipe(s) + f(b, args...) + close(b) + return String(take!(s)) +end +base64encode(args...) = base64encode(write, args...) diff --git a/stdlib/Base64/test/runtests.jl b/stdlib/Base64/test/runtests.jl new file mode 100644 index 0000000000000..8eab331ee6b1b --- /dev/null +++ b/stdlib/Base64/test/runtests.jl @@ -0,0 +1,77 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +using Test +import Base64: + Base64EncodePipe, + base64encode, + Base64DecodePipe, + base64decode + +const inputText = "Man is distinguished, not only by his reason, but by this singular passion from other animals, which is a lust of the mind, that by a perseverance of delight in the continued and indefatigable generation of knowledge, exceeds the short vehemence of any carnal pleasure." +const encodedMaxLine76 = """ +TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0aGlz +IHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1c3Qgb2Yg +dGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0aGUgY29udGlu +dWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdlLCBleGNlZWRzIHRo +ZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4=""" + +@testset "Examples" begin + # Encode and decode + fname = tempname() + open(fname, "w") do f + opipe = Base64EncodePipe(f) + write(opipe,inputText) + @test close(opipe) === nothing + end + + open(fname, "r") do f + ipipe = Base64DecodePipe(f) + @test read(ipipe, String) == inputText + @test close(ipipe) === nothing + end + rm(fname) + + # Byte-by-byte encode and decode. + buf = IOBuffer() + pipe = Base64EncodePipe(buf) + for char in inputText + write(pipe, UInt8(char)) + end + close(pipe) + pipe = Base64DecodePipe(IOBuffer(take!(buf))) + decoded = UInt8[] + while !eof(pipe) + push!(decoded, read(pipe, UInt8)) + end + @test String(decoded) == inputText + + # Encode to string and decode + @test String(base64decode(base64encode(inputText))) == inputText + + # Decode with max line chars = 76 and padding + ipipe = Base64DecodePipe(IOBuffer(encodedMaxLine76)) + @test read(ipipe, String) == inputText + + # Decode with max line chars = 76 and no padding + #ipipe = Base64DecodePipe(IOBuffer(encodedMaxLine76[1:end-1])) + #@test read(ipipe, String) == inputText + + # Decode with two padding characters ("==") + ipipe = Base64DecodePipe(IOBuffer(string(encodedMaxLine76[1:end-2],"=="))) + @test read(ipipe, String) == inputText[1:end-1] + + # Test incorrect format + ipipe = Base64DecodePipe(IOBuffer(encodedMaxLine76[1:end-3])) + @test_throws ArgumentError read(ipipe, String) + + # issue #21314 + @test base64decode(chomp("test")) == base64decode("test") +end + +@testset "Random data" begin + mt = MersenneTwister(1234) + for _ in 1:1000 + data = rand(mt, UInt8, rand(0:300)) + @test hash(base64decode(base64encode(data))) == hash(data) + end +end