diff --git a/README.md b/README.md index 89d540d8..499b00d0 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,44 @@ s = query(io) # io is a stream will return a `File` or `Stream` object that also encodes the detected file format. +Sometimes you want to read or write files that are larger than your available +memory, or might be an unknown or infinite length (e.g. reading an audio or +video stream from a socket). In these cases it might not make sense to process +the whole file at once, but instead process it a chunk at a time. For these +situations FileIO provides the `loadstreaming` and `savestreaming` functions, +which return an object that you can `read` or `write`, rather than the file data +itself. + +This would look something like: + +```jl +using FileIO +audio = loadstreaming("bigfile.wav") +try + while !eof(audio) + chunk = read(audio, 4096) # read 4096 frames + # process the chunk + end +finally + close(audio) +end +``` + +or use `do` syntax to auto-close the stream: + +```jl +using FileIO +loadstreaming("bigfile.wav") do audio + while !eof(audio) + chunk = read(audio, 4096) # read 4096 frames + # process the chunk + end +end +``` + +Note that in these cases you may want to use `read!` with a pre-allocated buffer +for maximum efficiency. + ## Adding new formats You register a new format by adding `add_format(fmt, magic, @@ -130,8 +168,9 @@ end Note that these are `load` and `save`, **not** `FileIO.load` and `FileIO.save`. Because a given format might have multiple packages that are capable of reading it, FileIO will dispatch to these using module-scoping, e.g., `SomePkg.load(args...)`. -Consequently, **packages should define "private" `load` and `save` methods, and -not extend (import) FileIO's**. +Consequently, **packages should define "private" `load` and `save` methods (also +`loadstreaming` and `savestreaming` if you implement them), and not extend +(import) FileIO's**. `load(::File)` and `save(::File)` should close any streams they open. (If you use the `do` syntax, this happens for you @@ -139,6 +178,52 @@ automatically even if the code inside the `do` scope throws an error.) Conversely, `load(::Stream)` and `save(::Stream)` should not close the input stream. +`loadstreaming` and `savestreaming` use the same query mechanism, but return a +decoded stream that users can `read` or `write`. You should also implement a +`close` method on your reader or writer type. Just like with `load` and `save`, +if the user provided a filename, your `close` method should be responsible for +closing any streams you opened in order to read or write the file. If you are +given a `Stream`, your `close` method should only do the clean up for your +reader or writer type, not close the stream. + +```jl +struct WAVReader + io::IO + ownstream::Bool +end + +function Base.read(reader::WAVReader, frames::Int) + # read and decode audio samples from reader.io +end + +function Base.close(reader::WAVReader) + # do whatever cleanup the reader needs + reader.ownstream && close(reader.io) +end + +# FileIO has fallback functions that make these work using `do` syntax as well, +# and will automatically call `close` on the returned object. +loadstreaming(f::File{format"WAV"}) = WAVReader(open(f), true) +loadstreaming(s::Stream{format"WAV"}) = WAVReader(s, false) +``` + +If you choose to implement `loadstreaming` and `savestreaming` in your package, +you can easily add `save` and `load` methods in the form of: + +```jl +function save(q::Formatted{format"WAV"}, data, args...; kwargs...) + savestreaming(q, args...; kwargs...) do stream + write(stream, data) + end +end + +function load(q::Formatted{format"WAV"}, args...; kwargs...) + loadstreaming(q, args...; kwargs...) do stream + read(stream) + end +end +``` + ## Help You can get an API overview by typing `?FileIO` at the REPL prompt. diff --git a/src/FileIO.jl b/src/FileIO.jl index 8cbf10de..9e2c1eda 100644 --- a/src/FileIO.jl +++ b/src/FileIO.jl @@ -17,9 +17,11 @@ export DataFormat, file_extension, info, load, + loadstreaming, magic, query, save, + savestreaming, skipmagic, stream, unknown @@ -40,7 +42,9 @@ include("registry.jl") - `load([filename|stream])`: read data in formatted file, inferring the format - `load(File(format"PNG",filename))`: specify the format manually +- `loadstreaming([filename|stream])`: similar to `load`, except that it returns an object that can be read from - `save(filename, data...)` for similar operations involving saving data +- `savestreaming([filename|stream])`: similar to `save`, except that it returns an object that can be written to - `io = open(f::File, args...)` opens a file - `io = stream(s::Stream)` returns the IOStream from the query object `s` diff --git a/src/loadsave.jl b/src/loadsave.jl index 3f2a6131..3cec1518 100644 --- a/src/loadsave.jl +++ b/src/loadsave.jl @@ -40,32 +40,76 @@ add_loader "`add_saver(fmt, :Package)` triggers `using Package` before saving format `fmt`" add_saver - """ - `load(filename)` loads the contents of a formatted file, trying to infer the format from `filename` and/or magic bytes in the file. - `load(strm)` loads from an `IOStream` or similar object. In this case, -the magic bytes are essential. -- `load(File(format"PNG",filename))` specifies the format directly, and bypasses inference. +there is no filename extension, so we rely on the magic bytes for format +identification. +- `load(File(format"PNG", filename))` specifies the format directly, and bypasses inference. +- `load(Stream(format"PNG", io))` specifies the format directly, and bypasses inference. - `load(f; options...)` passes keyword arguments on to the loader. """ -load(s::Union{AbstractString,IO}, args...; options...) = - load(query(s), args...; options...) +load + +""" +Some packages may implement a streaming API, where the contents of the file can +be read in chunks and processed, rather than all at once. Reading from these +higher-level streams should return a formatted object, like an image or chunk of +video or audio. + +- `loadstreaming(filename)` loads the contents of a formatted file, trying to infer +the format from `filename` and/or magic bytes in the file. It returns a streaming +type that can be read from in chunks, rather than loading the whole contents all +at once +- `loadstreaming(strm)` loads the stream from an `IOStream` or similar object. +In this case, there is no filename extension, so we rely on the magic bytes +for format identification. +- `loadstreaming(File(format"WAV",filename))` specifies the format directly, and +bypasses inference. +- `loadstreaming(Stream(format"WAV", io))` specifies the format directly, and +bypasses inference. +- `loadstreaming(f; options...)` passes keyword arguments on to the loader. +""" +loadstreaming """ - `save(filename, data...)` saves the contents of a formatted file, trying to infer the format from `filename`. - `save(Stream(format"PNG",io), data...)` specifies the format directly, and bypasses inference. +- `save(File(format"PNG",filename), data...)` specifies the format directly, and bypasses inference. - `save(f, data...; options...)` passes keyword arguments on to the saver. """ -save(s::Union{AbstractString,IO}, data...; options...) = - save(query(s), data...; options...) +save + +""" +Some packages may implement a streaming API, where the contents of the file can +be written in chunks, rather than all at once. These higher-level streams should +accept formatted objects, like an image or chunk of video or audio. + +- `savestreaming(filename, data...)` saves the contents of a formatted file, +trying to infer the format from `filename`. +- `savestreaming(File(format"WAV",filename))` specifies the format directly, and +bypasses inference. +- `savestreaming(Stream(format"WAV", io))` specifies the format directly, and +bypasses inference. +- `savestreaming(f, data...; options...)` passes keyword arguments on to the saver. +""" +savestreaming + +# if a bare filename or IO stream are given, query for the format and dispatch +# to the formatted handlers below +for fn in (:load, :loadstreaming, :save, :savestreaming) + @eval $fn(s::Union{AbstractString,IO}, args...; options...) = + $fn(query(s), args...; options...) +end +# return a save function, so you can do `thing_to_save |> save("filename.ext")` function save(s::Union{AbstractString,IO}; options...) data -> save(s, data; options...) end -# Forced format +# Allow format to be overridden with first argument function save{sym}(df::Type{DataFormat{sym}}, f::AbstractString, data...; options...) libraries = applicable_savers(df) checked_import(libraries[1]) @@ -73,6 +117,13 @@ function save{sym}(df::Type{DataFormat{sym}}, f::AbstractString, data...; option $data...; $options...))) end +function savestreaming{sym}(df::Type{DataFormat{sym}}, s::IO, data...; options...) + libraries = applicable_savers(df) + checked_import(libraries[1]) + eval(Main, :($savestreaming($Stream($(DataFormat{sym}), $s), + $data...; $options...))) +end + function save{sym}(df::Type{DataFormat{sym}}, s::IO, data...; options...) libraries = applicable_savers(df) checked_import(libraries[1]) @@ -80,46 +131,72 @@ function save{sym}(df::Type{DataFormat{sym}}, s::IO, data...; options...) $data...; $options...))) end +function savestreaming{sym}(df::Type{DataFormat{sym}}, f::AbstractString, data...; options...) + libraries = applicable_savers(df) + checked_import(libraries[1]) + eval(Main, :($savestreaming($File($(DataFormat{sym}), $f), + $data...; $options...))) +end -# Fallbacks -function load{F}(q::Formatted{F}, args...; options...) - if unknown(q) - isfile(filename(q)) || open(filename(q)) # force systemerror - throw(UnknownFormat(q)) - end - libraries = applicable_loaders(q) - failures = Any[] - for library in libraries +# do-syntax for streaming IO +for fn in (:loadstreaming, :savestreaming) + @eval function $fn(f::Function, args...; kwargs...) + str = $fn(args...; kwargs...) try - Library = checked_import(library) - if !has_method_from(methods(Library.load), Library) - throw(LoaderError(string(library), "load not defined")) + f(str) + finally + close(str) + end + end +end + +# Handlers for formatted files/streams + +for fn in (:load, :loadstreaming) + @eval function $fn{F}(q::Formatted{F}, args...; options...) + if unknown(q) + isfile(filename(q)) || open(filename(q)) # force systemerror + throw(UnknownFormat(q)) + end + libraries = applicable_loaders(q) + failures = Any[] + for library in libraries + try + Library = checked_import(library) + if !has_method_from(methods(Library.$fn), Library) + throw(LoaderError(string(library), "$($fn) not defined")) + end + return eval(Main, :($(Library.$fn)($q, $args...; $options...))) + catch e + push!(failures, (e, q)) end - return eval(Main, :($(Library.load)($q, $args...; $options...))) - catch e - push!(failures, (e, q)) end + handle_exceptions(failures, "loading \"$(filename(q))\"") end - handle_exceptions(failures, "loading \"$(filename(q))\"") end -function save{F}(q::Formatted{F}, data...; options...) - unknown(q) && throw(UnknownFormat(q)) - libraries = applicable_savers(q) - failures = Any[] - for library in libraries - try - Library = checked_import(library) - if !has_method_from(methods(Library.save), Library) - throw(WriterError(string(library), "save not defined")) + +for fn in (:save, :savestreaming) + @eval function $fn{F}(q::Formatted{F}, data...; options...) + unknown(q) && throw(UnknownFormat(q)) + libraries = applicable_savers(q) + failures = Any[] + for library in libraries + try + Library = checked_import(library) + if !has_method_from(methods(Library.$fn), Library) + throw(WriterError(string(library), "$($fn) not defined")) + end + return eval(Main, :($(Library.$fn)($q, $data...; $options...))) + catch e + push!(failures, (e, q)) end - return eval(Main, :($(Library.save)($q, $data...; $options...))) - catch e - push!(failures, (e, q)) end + handle_exceptions(failures, "saving \"$(filename(q))\"") end - handle_exceptions(failures, "saving \"$(filename(q))\"") end +# returns true if the given method table includes a method defined by the given +# module, false otherwise function has_method_from(mt, Library) for m in mt if getmodule(m) == Library diff --git a/test/loadsave.jl b/test/loadsave.jl index 0be66992..b8c4470e 100644 --- a/test/loadsave.jl +++ b/test/loadsave.jl @@ -61,15 +61,76 @@ module Dummy using FileIO -function load(file::File{format"DUMMY"}) +mutable struct DummyReader{IOtype} + stream::IOtype + ownstream::Bool + bytesleft::Int64 +end + +function DummyReader(stream, ownstream) + read(stream, 5) == magic(format"DUMMY") || error("wrong magic bytes") + DummyReader(stream, ownstream, read(stream, Int64)) +end + +function Base.read(stream::DummyReader, n=stream.bytesleft) + toread = min(n, stream.bytesleft) + buf = read(stream.stream, toread) + stream.bytesleft -= length(buf) + buf +end + +Base.eof(stream::DummyReader) = stream.bytesleft == 0 || eof(stream.stream) +Base.close(stream::DummyReader) = stream.ownstream && close(stream.stream) + +mutable struct DummyWriter{IOtype} + stream::IOtype + ownstream::Bool + headerpos::Int64 + byteswritten::Int +end + +function DummyWriter(stream, ownstream) + write(stream, magic(format"DUMMY")) # Write the magic bytes + # store the position where we'll need to write the length + pos = position(stream) + # write a dummy length value + write(stream, 0xffffffffffffffff) + DummyWriter(stream, ownstream, pos, 0) +end + +function Base.write(stream::DummyWriter, data) + udata = convert(Vector{UInt8}, data) + n = write(stream.stream, udata) + stream.byteswritten += n + + n +end + +function Base.close(stream::DummyWriter) + here = position(stream.stream) + # go back and write the header + seek(stream.stream, stream.headerpos) + write(stream.stream, convert(Int64, stream.byteswritten)) + seek(stream.stream, here) + stream.ownstream && close(stream.stream) + + nothing +end + +loadstreaming(s::Stream{format"DUMMY"}) = DummyReader(s, false) +loadstreaming(file::File{format"DUMMY"}) = DummyReader(open(file), true) +savestreaming(s::Stream{format"DUMMY"}) = DummyWriter(s, false) +savestreaming(file::File{format"DUMMY"}) = DummyWriter(open(file, "w"), true) + +# we could implement `load` and `save` in terms of their streaming versions +function FileIO.load(file::File{format"DUMMY"}) open(file) do s - skipmagic(s) load(s) end end -function load(s::Stream{format"DUMMY"}) - # We're already past the magic bytes +function FileIO.load(s::Stream{format"DUMMY"}) + skipmagic(s) n = read(s, Int64) out = Vector{UInt8}(n) read!(s, out) @@ -133,7 +194,6 @@ add_saver(format"DUMMY", :Dummy) @test a == b b = open(query(fn)) do s - skipmagic(s) load(s) end @test a == b @@ -157,6 +217,46 @@ add_saver(format"DUMMY", :Dummy) end rm(fn) + # streaming I/O with filenames + fn = string(tempname(), ".dmy") + save(fn, a) + loadstreaming(fn) do reader + @test read(reader) == a + end + rm(fn) + savestreaming(fn) do writer + write(writer, a) + end + @test load(fn) == a + rm(fn) + + # force format + fn = string(tempname(), ".dmy") + savestreaming(format"DUMMY", fn) do writer + write(writer, a) + end + @test load(fn) == a + rm(fn) + + # streaming I/O with streams + save(fn, a) + open(fn) do io + loadstreaming(io) do reader + @test read(reader) == a + end + @test isopen(io) + end + rm(fn) + open(fn, "w") do io + savestreaming(format"DUMMY", io) do writer + write(writer, a) + end + @test isopen(io) + end + @test load(fn) == a + rm(fn) + + @test_throws Exception save("missing.fmt",5) end @@ -210,6 +310,8 @@ end @test typeof(query(fn)) == File{format"AmbigExt1"} rm(fn) + del_format(format"AmbigExt1") + del_format(format"AmbigExt2") end @testset "Absent file" begin