-
Notifications
You must be signed in to change notification settings - Fork 5
/
serialization.jl
371 lines (290 loc) · 13.9 KB
/
serialization.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
#####
##### LPCM API types/functions/stubs
#####
const LPCM_FORMAT_REGISTRY = Any[]
"""
Onda.register_lpcm_format!(create_constructor)
Register an `AbstractLPCMFormat` constructor so that it can automatically be used when
[`format`](@ref) is called. Authors of new `AbstractLPCMFormat` subtypes should call this
function for their subtype.
`create_constructor` should be a unary function that accepts a single `file_format::AbstractString`
argument, and return either a matching `AbstractLPCMFormat` constructor or `nothing`. Any returned
`AbstractLPCMFormat` constructor `f` should be of the form `f(info; kwargs...)::AbstractLPCMFormat`
where `info` is a [`SamplesInfoV2`](@ref)-compliant value.
Note that if `Onda.register_lpcm_format!` is called in a downstream package, it must be called
within the `__init__` function of the package's top-level module to ensure that the function
is always invoked when the module is loaded (not just during precompilation). For details,
see https://docs.julialang.org/en/v1/manual/modules/#Module-initialization-and-precompilation.
"""
register_lpcm_format!(create_constructor) = push!(LPCM_FORMAT_REGISTRY, create_constructor)
"""
format(file_format::AbstractString, info; kwargs...)
Return `f(info; kwargs...)` where `f` constructs the `AbstractLPCMFormat` instance that
corresponds to `file_format` and info is a [`SamplesInfoV2`](@ref)-compliant value. `f` is
determined by matching `file_format` to a suitable format constuctor registered via
[`register_lpcm_format!`](@ref).
See also: [`deserialize_lpcm`](@ref), [`serialize_lpcm`](@ref)
"""
function format(file_format::AbstractString, info; kwargs...)
for create_constructor in LPCM_FORMAT_REGISTRY
f = create_constructor(file_format)
f === nothing && continue
return f(info; kwargs...)
end
throw(ArgumentError("unrecognized file_format: \"$file_format\""))
end
"""
AbstractLPCMFormat
A type whose subtypes represents byte/stream formats that can be (de)serialized
to/from Onda's standard interleaved LPCM representation.
All subtypes of the form `F<:AbstractLPCMFormat` must call [`Onda.register_lpcm_format!`](@ref)
and define an appropriate [`file_format_string`](@ref) method.
See also:
- [`format`](@ref)
- [`deserialize_lpcm`](@ref)
- [`deserialize_lpcm_callback`](@ref)
- [`serialize_lpcm`](@ref)
- [`LPCMFormat`](@ref)
- [`LPCMZstFormat`](@ref)
- [`AbstractLPCMStream`](@ref)
"""
abstract type AbstractLPCMFormat end
"""
AbstractLPCMStream
A type that represents an LPCM (de)serialization stream.
See also:
- [`deserializing_lpcm_stream`](@ref)
- [`serializing_lpcm_stream`](@ref)
- [`finalize_lpcm_stream`](@ref)
"""
abstract type AbstractLPCMStream end
"""
deserialize_lpcm_callback(format::AbstractLPCMFormat, samples_offset, samples_count)
Return `(callback, required_byte_offset, required_byte_count)` where `callback` accepts the
byte block specified by `required_byte_offset` and `required_byte_count` and returns the
samples specified by `samples_offset` and `samples_count`.
As a fallback, this function returns `(callback, missing, missing)`, where `callback`
requires all available bytes. `AbstractLPCMFormat` subtypes that support partial/block-based
deserialization (e.g. the basic `LPCMFormat`) can overload this function to only request
exactly the byte range that is required for the sample range requested by the caller.
This allows callers to handle the byte block retrieval themselves while keeping
Onda's LPCM Serialization API agnostic to the caller's storage layer of choice.
"""
function deserialize_lpcm_callback(format::AbstractLPCMFormat, samples_offset, samples_count)
callback = bytes -> deserialize_lpcm(format, bytes, samples_offset, samples_count)
return callback, missing, missing
end
"""
deserializing_lpcm_stream(format::AbstractLPCMFormat, io)
Return a `stream::AbstractLPCMStream` that wraps `io` to enable direct LPCM
deserialization from `io` via [`deserialize_lpcm`](@ref).
Note that `stream` must be finalized after usage via [`finalize_lpcm_stream`](@ref).
Until `stream` is finalized, `io` should be considered to be part of the internal
state of `stream` and should not be directly interacted with by other processes.
"""
function deserializing_lpcm_stream end
"""
serializing_lpcm_stream(format::AbstractLPCMFormat, io)
Return a `stream::AbstractLPCMStream` that wraps `io` to enable direct LPCM
serialization to `io` via [`serialize_lpcm`](@ref).
Note that `stream` must be finalized after usage via [`finalize_lpcm_stream`](@ref).
Until `stream` is finalized, `io` should be considered to be part of the internal
state of `stream` and should not be directly interacted with by other processes.
"""
function serializing_lpcm_stream end
"""
finalize_lpcm_stream(stream::AbstractLPCMStream)::Bool
Finalize `stream`, returning `true` if the underlying I/O object used to construct
`stream` is still open and usable. Otherwise, return `false` to indicate that
underlying I/O object was closed as result of finalization.
"""
function finalize_lpcm_stream end
"""
deserialize_lpcm(format::AbstractLPCMFormat, bytes,
samples_offset::Integer=0,
samples_count::Integer=typemax(Int))
deserialize_lpcm(stream::AbstractLPCMStream,
samples_offset::Integer=0,
samples_count::Integer=typemax(Int))
Return a channels-by-timesteps `AbstractMatrix` of interleaved LPCM-encoded
sample data by deserializing the provided `bytes` in the given `format`, or
from the given `stream` constructed by [`deserializing_lpcm_stream`](@ref).
Note that this operation may be performed in a zero-copy manner such that the
returned sample matrix directly aliases `bytes`.
The returned segment is at most `sample_offset` samples offset from the start of
`stream`/`bytes` and contains at most `sample_count` samples. This ensures that
overrun behavior is generally similar to the behavior of `Base.skip(io, n)` and
`Base.read(io, n)`.
This function is the inverse of the corresponding [`serialize_lpcm`](@ref) method, i.e.:
```
serialize_lpcm(format, deserialize_lpcm(format, bytes)) == bytes
```
"""
function deserialize_lpcm end
"""
serialize_lpcm(format::AbstractLPCMFormat, samples::AbstractMatrix)
serialize_lpcm(stream::AbstractLPCMStream, samples::AbstractMatrix)
Return the `AbstractVector{UInt8}` of bytes that results from serializing `samples`
to the given `format` (or serialize those bytes directly to `stream`) where `samples`
is a channels-by-timesteps matrix of interleaved LPCM-encoded sample data.
Note that this operation may be performed in a zero-copy manner such that the
returned `AbstractVector{UInt8}` directly aliases `samples`.
This function is the inverse of the corresponding [`deserialize_lpcm`](@ref)
method, i.e.:
```
deserialize_lpcm(format, serialize_lpcm(format, samples)) == samples
```
"""
function serialize_lpcm end
"""
file_format_string(format::AbstractLPCMFormat)
Return the `String` representation of `format` to be written to the `file_format` field of a `*.signals` file.
"""
function file_format_string end
#####
##### read_lpcm/write_lpcm
#####
read_lpcm(path, format::AbstractLPCMFormat) = deserialize_lpcm(format, read(path))
function read_lpcm(path, format::AbstractLPCMFormat, sample_offset, sample_count)
deserialize_requested_samples,
required_byte_offset,
required_byte_count = deserialize_lpcm_callback(format,
sample_offset,
sample_count)
bytes = read_byte_range(path, required_byte_offset, required_byte_count)
return deserialize_requested_samples(bytes)
end
write_lpcm(path, format::AbstractLPCMFormat, data) = write_full_path(path, serialize_lpcm(format, data))
#####
##### `LPCMFormat`
#####
"""
LPCMFormat(channel_count::Int, sample_type::Type)
LPCMFormat(info::SamplesInfoV2)
Return a `LPCMFormat<:AbstractLPCMFormat` instance corresponding to Onda's default
interleaved LPCM format assumed for sample data files with the "lpcm"
extension.
`channel_count` corresponds to `length(info.channels)`, while `sample_type`
corresponds to `sample_type(info)`
Note that bytes (de)serialized to/from this format are little-endian (per the
Onda specification).
"""
struct LPCMFormat{S<:LPCM_SAMPLE_TYPE_UNION} <: AbstractLPCMFormat
channel_count::Int
sample_type::Type{S}
end
LPCMFormat(info) = LPCMFormat(length(info.channels), sample_type(info))
register_lpcm_format!(file_format -> file_format == "lpcm" ? LPCMFormat : nothing)
file_format_string(::LPCMFormat) = "lpcm"
function _validate_lpcm_samples(format::LPCMFormat{S}, samples::AbstractMatrix) where {S}
if format.channel_count != size(samples, 1)
throw(ArgumentError("""
`samples` row count ($(size(samples, 1))) does not
match expected channel count ($(format.channel_count))
"""))
elseif !(eltype(samples) <: S)
throw(ArgumentError("""
`samples` eltype ($(eltype(samples))) does not
match expected eltype ($S)
"""))
end
return nothing
end
_bytes_per_sample(format::LPCMFormat{S}) where {S} = sizeof(S) * format.channel_count
struct LPCMStream{S<:LPCM_SAMPLE_TYPE_UNION,I} <: AbstractLPCMStream
format::LPCMFormat{S}
io::I
end
deserializing_lpcm_stream(format::LPCMFormat, io) = LPCMStream(format, io)
serializing_lpcm_stream(format::LPCMFormat, io) = LPCMStream(format, io)
finalize_lpcm_stream(::LPCMStream) = true
function deserialize_lpcm(format::LPCMFormat{S}, bytes, sample_offset::Integer=0,
sample_count::Integer=typemax(Int)) where {S}
sample_interpretation = reinterpret(S, bytes)
sample_start = min((format.channel_count * sample_offset) + 1, length(sample_interpretation))
sample_end = format.channel_count * (sample_offset + sample_count)
sample_end = sample_end >= 0 ? sample_end : typemax(Int) # handle overflow
sample_end = min(sample_end, length(sample_interpretation))
sample_view = view(sample_interpretation, sample_start:sample_end)
timestep_count = min(Int(length(sample_view) / format.channel_count), sample_count)
return reshape(sample_view, (format.channel_count, timestep_count))
end
function deserialize_lpcm_callback(format::LPCMFormat{S}, samples_offset, samples_count) where {S}
callback = bytes -> deserialize_lpcm(format, bytes)
bytes_per_sample = _bytes_per_sample(format)
return callback, samples_offset * bytes_per_sample, samples_count * bytes_per_sample
end
function deserialize_lpcm(stream::LPCMStream, sample_offset::Integer=0,
sample_count::Integer=typemax(Int))
bytes_per_sample = _bytes_per_sample(stream.format)
jump(stream.io, bytes_per_sample * sample_offset)
byte_count = bytes_per_sample * sample_count
byte_count = byte_count >= 0 ? byte_count : typemax(Int) # handle overflow
return deserialize_lpcm(stream.format, read(stream.io, byte_count))
end
function serialize_lpcm(format::LPCMFormat, samples::AbstractMatrix)
_validate_lpcm_samples(format, samples)
samples isa Matrix && return reinterpret(UInt8, vec(samples))
io = IOBuffer()
write(io, samples)
return resize!(io.data, io.size)
end
function serialize_lpcm(stream::LPCMStream, samples::AbstractMatrix)
_validate_lpcm_samples(stream.format, samples)
return write(stream.io, samples)
end
#####
##### `LPCMZstFormat`
#####
"""
LPCMZstFormat(lpcm::LPCMFormat; level=3)
LPCMZstFormat(info; level=3)
Return a `LPCMZstFormat<:AbstractLPCMFormat` instance that corresponds to Onda's
default interleaved LPCM format compressed by `zstd`. This format is assumed
for sample data files with the "lpcm.zst" extension.
The `level` keyword argument sets the same compression level parameter as the
corresponding flag documented by the `zstd` command line utility.
See https://facebook.github.io/zstd/ for details about `zstd`.
"""
struct LPCMZstFormat{S} <: AbstractLPCMFormat
lpcm::LPCMFormat{S}
level::Int
LPCMZstFormat(lpcm::LPCMFormat{S}; level=3) where {S} = new{S}(lpcm, level)
end
LPCMZstFormat(info; kwargs...) = LPCMZstFormat(LPCMFormat(info); kwargs...)
register_lpcm_format!(file_format -> file_format == "lpcm.zst" ? LPCMZstFormat : nothing)
file_format_string(::LPCMZstFormat) = "lpcm.zst"
function deserialize_lpcm(format::LPCMZstFormat, bytes, args...)
decompressed_bytes = zstd_decompress(unsafe_vec_uint8(bytes))
return deserialize_lpcm(format.lpcm, decompressed_bytes, args...)
end
function serialize_lpcm(format::LPCMZstFormat, samples::AbstractMatrix)
decompressed_bytes = unsafe_vec_uint8(serialize_lpcm(format.lpcm, samples))
return zstd_compress(decompressed_bytes, format.level)
end
struct LPCMZstStream{L<:LPCMStream} <: AbstractLPCMStream
stream::L
end
function deserializing_lpcm_stream(format::LPCMZstFormat, io)
stream = LPCMStream(format.lpcm, ZstdDecompressorStream(io))
return LPCMZstStream(stream)
end
function serializing_lpcm_stream(format::LPCMZstFormat, io)
stream = LPCMStream(format.lpcm, ZstdCompressorStream(io; level=format.level))
return LPCMZstStream(stream)
end
function finalize_lpcm_stream(stream::LPCMZstStream)
if stream.stream.io isa ZstdCompressorStream
# write `TranscodingStreams.TOKEN_END` and change the `ZstdCompressorStream`'s
# mode to `:close`, which flushes any remaining buffered data and finalizes the
# underlying codec to free its resources without closing the underlying I/O object.
write(stream.stream.io, TranscodingStreams.TOKEN_END)
TranscodingStreams.changemode!(stream.stream.io, :close)
return true
else
close(stream.stream.io)
return false
end
end
deserialize_lpcm(stream::LPCMZstStream, args...) = deserialize_lpcm(stream.stream, args...)
serialize_lpcm(stream::LPCMZstStream, args...) = serialize_lpcm(stream.stream, args...)