From 5dad58045e3f9a703b36c00f958a9df449717016 Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Fri, 15 Sep 2017 14:38:22 -0400 Subject: [PATCH] add header to serialization format. fixes #22888 --- base/serialize.jl | 62 ++++++++++++++++++++++++++++++++---- doc/src/stdlib/io-network.md | 1 + test/serialize.jl | 18 +++++++++-- 3 files changed, 72 insertions(+), 9 deletions(-) diff --git a/base/serialize.jl b/base/serialize.jl index b78b658430fe5..4dea2c3e818bc 100644 --- a/base/serialize.jl +++ b/base/serialize.jl @@ -24,7 +24,8 @@ SerializationState(io::IO) = SerializationState{typeof(io)}(io) # types AbstractSerializer and Serializer # defined in dict.jl const n_int_literals = 33 -const n_reserved_slots = 12 +const n_reserved_slots = 25 +const n_reserved_tags = 12 const TAGS = Any[ Symbol, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128, @@ -48,6 +49,8 @@ const TAGS = Any[ Symbol, # OBJECT_TAG Symbol, # REF_OBJECT_TAG Symbol, # FULL_GLOBALREF_TAG + Symbol, # HEADER_TAG + fill(Symbol, n_reserved_tags)..., (), Bool, Any, Bottom, Core.TypeofBottom, Type, svec(), Tuple{}, false, true, nothing, :Any, :Array, :TypeVar, :Box, :Tuple, :Ptr, :return, :call, Symbol("::"), :Function, @@ -64,9 +67,9 @@ const TAGS = Any[ (Int64(0):Int64(n_int_literals-1))... ] -@assert length(TAGS) <= 255 +@assert length(TAGS) == 255 -const ser_version = 6 # do not make changes without bumping the version #! +const ser_version = 7 # do not make changes without bumping the version #! const NTAGS = length(TAGS) @@ -120,6 +123,7 @@ const WRAPPER_DATATYPE_TAG = Int32(o0+11) const OBJECT_TAG = Int32(o0+12) const REF_OBJECT_TAG = Int32(o0+13) const FULL_GLOBALREF_TAG = Int32(o0+14) +const HEADER_TAG = Int32(o0+15) writetag(s::IO, tag) = write(s, UInt8(tag)) @@ -638,15 +642,54 @@ function serialize_any(s::AbstractSerializer, @nospecialize(x)) end """ - serialize(stream, value) + Serializer.writeheader(s::AbstractSerializer) + +Write an identifying header to the specified serializer. The header consists of +8 bytes as follows: + +| Offset | Description | +|:-------|:------------------------------------------------| +| 0 | tag byte (0x37) | +| 1-2 | signature bytes "JL" | +| 3 | protocol version | +| 4 | bits 0-1: endianness: 0 = little, 1 = big | +| 4 | bits 2-3: platform: 0 = 32-bit, 1 = 64-bit | +| 5-7 | reserved | +""" +function writeheader(s::AbstractSerializer) + io = s.io + writetag(io, HEADER_TAG) + write(io, "JL") # magic bytes + write(io, UInt8(ser_version)) + endianness = (ENDIAN_BOM == 0x04030201 ? 0 : + ENDIAN_BOM == 0x01020304 ? 1 : + error("unsupported endianness in serializer")) + machine = (sizeof(Int) == 4 ? 0 : + sizeof(Int) == 8 ? 1 : + error("unsupported word size in serializer")) + write(io, UInt8(endianness) | (UInt8(machine) << 2)) + write(io, b"\x00\x00\x00") # 3 reserved bytes + nothing +end + +""" + serialize(stream::IO, value) Write an arbitrary value to a stream in an opaque format, such that it can be read back by -[`deserialize`](@ref). The read-back value will be as identical as possible to the original. In -general, this process will not work if the reading and writing are done by different +[`deserialize`](@ref). The read-back value will be as identical as possible to the original. +In general, this process will not work if the reading and writing are done by different versions of Julia, or an instance of Julia with a different system image. `Ptr` values are serialized as all-zero bit patterns (`NULL`). + +An 8-byte identifying header is written to the stream first. To avoid writing the header, +construct a `SerializationState` and use it as the first argument to `serialize` instead. +See also [`Serializer.writeheader`](@ref). """ -serialize(s::IO, x) = serialize(SerializationState(s), x) +function serialize(s::IO, x) + ss = SerializationState(s) + writeheader(ss) + serialize(ss, x) +end ## deserializing values ## @@ -745,6 +788,11 @@ function handle_deserialize(s::AbstractSerializer, b::Int32) return s.table[Int(id)] elseif b == LONGSYMBOL_TAG return deserialize_symbol(s, Int(read(s.io, Int32)::Int32)) + elseif b == HEADER_TAG + for _ = 1:7 + read(s.io, UInt8) + end + return deserialize(s) end t = desertag(b) if t.mutable && length(t.types) > 0 # manual specialization of fieldcount diff --git a/doc/src/stdlib/io-network.md b/doc/src/stdlib/io-network.md index 22dc207996886..d7b6a1973ea63 100644 --- a/doc/src/stdlib/io-network.md +++ b/doc/src/stdlib/io-network.md @@ -35,6 +35,7 @@ Base.isreadable Base.isopen Base.Serializer.serialize Base.Serializer.deserialize +Base.Serializer.writeheader Base.Grisu.print_shortest Base.fd Base.redirect_stdout diff --git a/test/serialize.jl b/test/serialize.jl index f071ddd4fdf26..0be4923142896 100644 --- a/test/serialize.jl +++ b/test/serialize.jl @@ -4,8 +4,8 @@ using Base.Test # Check that serializer hasn't gone out-of-frame @test Serializer.sertag(Symbol) == 1 -@test Serializer.sertag(()) == 55 -@test Serializer.sertag(false) == 63 +@test Serializer.sertag(()) == 68 +@test Serializer.sertag(false) == 76 function create_serialization_stream(f::Function) s = IOBuffer() @@ -480,3 +480,17 @@ let x = T20324[T20324(1) for i = 1:2] @test isa(y,Vector{T20324}) @test y == x end + +# serializer header +let io = IOBuffer() + serialize(io, ()) + seekstart(io) + b = read(io) + @test b[1] == Serializer.HEADER_TAG + @test b[2:3] == b"JL" + @test b[4] == Serializer.ser_version + @test (b[5] & 0x3) == (ENDIAN_BOM == 0x01020304) + @test ((b[5] & 0xc)>>2) == (sizeof(Int) == 8) + @test (b[5] & 0xf0) == 0 + @test all(b[6:8] .== 0) +end