From 0d9303161c563f867a0f7ad69f3f497740bbd957 Mon Sep 17 00:00:00 2001 From: Spencer Russell Date: Wed, 28 Sep 2016 11:31:43 -0400 Subject: [PATCH 01/11] initial WIP draft to demonstrate general idea --- README.md | 74 ++++++++++++++++++++++++++++ src/FileIO.jl | 4 ++ src/loadsave.jl | 127 +++++++++++++++++++++++++++++++++++------------- 3 files changed, 172 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index 89d540d8..f4fa42f9 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,40 @@ s = query(io) # io is a stream will return a `File` or `Stream` object that also encodes the detected file format. +Sometimes you want to read or write files that are larger than your available +memory, or might be an unknown or infinite length (e.g. reading an audio or +video stream from a socket). In these cases it might not make sense to process +the whole file at once, but instead process it a chunk at a time. For these situations FileIO provides the `loadstreaming` and `savestreaming` functions, which return an object that you can `read` or `write`, rather than the file data itself. + +This would look something like: + +```jl +using FileIO +audio = loadstreaming("bigfile.wav") +try + while !eof(audio) + chunk = read(audio, 4096) # read 4096 frames + # process the chunk + end +finally + close(stream) +end +``` + +or use `do` syntax to auto-close the stream: + +```jl +using FileIO +do loadstreaming("bigfile.wav") audio + while !eof(audio) + chunk = read(audio, 4096) # read 4096 frames + # process the chunk + end +end +``` + +Note that in these cases you may want to use `read!` with a pre-allocated buffer for maximum efficiency. + ## Adding new formats You register a new format by adding `add_format(fmt, magic, @@ -139,6 +173,46 @@ automatically even if the code inside the `do` scope throws an error.) Conversely, `load(::Stream)` and `save(::Stream)` should not close the input stream. +`loadstreaming` and `savestreaming` use the same query mechanism, but return a decoded stream that users can `read` or `write`. You should also implement a `close` method on your reader or writer type. Just like with `load` and `save`, if the user provided a filename, your `close` method should be responsible for closing any streams you opened in order to read or write the file. If you are given a `Stream`, your `close` method should only do the clean up for your reader or writer type, not close the stream. + +```julia +struct WAVReader + io::IO + ownstream::Bool +end + +function read(reader::WAVReader, frames::Int) + # read and decode audio samples from reader.io +end + +function close(reader::WAVReader) + # do whatever cleanup the reader needs + if reader.ownstream + close(reader.io) + end +end +loadstreaming(f::File{format"WAV"}) = WAVReader(open(f), ownstream=true) +loadstreaming(s::Stream{format"WAV"}) = WAVReader(s, ownstream=false) +# FileIO has fallback functions that make these work using `do` syntax as well. +``` + +If you choose to implement `loadstreaming` and `savestreaming` in your package, +you can easily add `save` and `load` methods in the form of: + +```julia +function save(q::Formatted{format"WAV"}, data, args...; kwargs...) + savestreaming(args...; kwargs...) do stream + write(stream, data) + end +end + +function load(q::Formatted{format"WAV"}, args...; kwargs...) + savestreaming(args...; kwargs...) do stream + readall(stream) + end +end +``` + ## Help You can get an API overview by typing `?FileIO` at the REPL prompt. diff --git a/src/FileIO.jl b/src/FileIO.jl index 8cbf10de..b53bad46 100644 --- a/src/FileIO.jl +++ b/src/FileIO.jl @@ -17,9 +17,11 @@ export DataFormat, file_extension, info, load, + loadstreaming, magic, query, save, + savestreaming, skipmagic, stream, unknown @@ -40,7 +42,9 @@ include("registry.jl") - `load([filename|stream])`: read data in formatted file, inferring the format - `load(File(format"PNG",filename))`: specify the format manually +- `loadstreaming(f)`: similar to `load`, except that it returns an object that can be read from - `save(filename, data...)` for similar operations involving saving data +- `savestreaming(f)`: similar to `save`, except that it returns an object that can be written to - `io = open(f::File, args...)` opens a file - `io = stream(s::Stream)` returns the IOStream from the query object `s` diff --git a/src/loadsave.jl b/src/loadsave.jl index 3f2a6131..7342386a 100644 --- a/src/loadsave.jl +++ b/src/loadsave.jl @@ -49,8 +49,24 @@ the magic bytes are essential. - `load(File(format"PNG",filename))` specifies the format directly, and bypasses inference. - `load(f; options...)` passes keyword arguments on to the loader. """ -load(s::Union{AbstractString,IO}, args...; options...) = - load(query(s), args...; options...) +load + +""" +Some packages may implement a streaming API, where the contents of the file can +be read in chunks and processed, rather than all at once. Reading from these +higher-level streams should return a formatted object, like an image or chunk of +video or audio. + +- `loadstreaming(filename)` loads the contents of a formatted file, trying to infer +the format from `filename` and/or magic bytes in the file. It returns a streaming +type that can be read from in chunks, rather than loading the whole contents all +at once +- `loadstreaming(strm)` loads the stream from an `IOStream` or similar object. In this case, +the magic bytes are essential. +- `load(File(format"PNG",filename))` specifies the format directly, and bypasses inference. +- `load(f; options...)` passes keyword arguments on to the loader. +""" +loadstreaming """ - `save(filename, data...)` saves the contents of a formatted file, @@ -58,8 +74,25 @@ trying to infer the format from `filename`. - `save(Stream(format"PNG",io), data...)` specifies the format directly, and bypasses inference. - `save(f, data...; options...)` passes keyword arguments on to the saver. """ -save(s::Union{AbstractString,IO}, data...; options...) = - save(query(s), data...; options...) +save + +""" +Some packages may implement a streaming API, where the contents of the file can +be written in chunks, rather than all at once. These higher-level streams should +accept formatted objects, like an image or chunk of video or audio. + +- `savestreaming(filename, data...)` saves the contents of a formatted file, +trying to infer the format from `filename`. +- `savestreaming(Stream(format"PNG",io), data...)` specifies the format directly, and bypasses inference. +- `savestreaming(f, data...; options...)` passes keyword arguments on to the saver. +""" +savestreaming + + +for fn in (:load, :loadstreaming, :save, :savestreaming) + @eval $fn(s::Union{AbstractString,IO}, data...; options...) = + $fn(query(s), data...; options...) +end function save(s::Union{AbstractString,IO}; options...) data -> save(s, data; options...) @@ -73,51 +106,79 @@ function save{sym}(df::Type{DataFormat{sym}}, f::AbstractString, data...; option $data...; $options...))) end +function savestreaming{sym}(df::Type{DataFormat{sym}}, s::IO, data...; options...) + libraries = applicable_savers(df) + checked_import(libraries[1]) + eval(Main, :($savestreaming($Stream($(DataFormat{sym}), $s), + $data...; $options...))) + function save{sym}(df::Type{DataFormat{sym}}, s::IO, data...; options...) libraries = applicable_savers(df) checked_import(libraries[1]) eval(Main, :($save($Stream($(DataFormat{sym}), $s), $data...; $options...))) + +function savestreaming{sym}(df::Type{DataFormat{sym}}, f::AbstractString, data...; options...) + libraries = applicable_savers(df) + checked_import(libraries[1]) + eval(Main, :($savestreaming($File($(DataFormat{sym}), $f), + $data...; $options...))) +end + +# do-syntax for streaming IO +for fn in (:loadstreaming, :savestreaming) + @eval function $fn(f::Function, args...; kwargs...) + str = $fn(args...; kwargs...) + try + f(str) + finally + close(str) + end + end end # Fallbacks -function load{F}(q::Formatted{F}, args...; options...) - if unknown(q) - isfile(filename(q)) || open(filename(q)) # force systemerror - throw(UnknownFormat(q)) - end - libraries = applicable_loaders(q) - failures = Any[] - for library in libraries - try - Library = checked_import(library) - if !has_method_from(methods(Library.load), Library) - throw(LoaderError(string(library), "load not defined")) +for fn in (:load, :loadstreaming) + @eval function $fn{F}(q::Formatted{F}, args...; options...) + if unknown(q) + isfile(filename(q)) || open(filename(q)) # force systemerror + throw(UnknownFormat(q)) + end + libraries = applicable_loaders(q) + failures = Any[] + for library in libraries + try + Library = checked_import(library) + if !has_method_from(methods(Library.$fn), Library) + throw(LoaderError(string(library), "$fn not defined")) + end + return eval(Main, :($(Library.$fn)($q, $args...; $options...))) + catch e + push!(failures, (e, q)) end - return eval(Main, :($(Library.load)($q, $args...; $options...))) - catch e - push!(failures, (e, q)) end + handle_exceptions(failures, "loading \"$(filename(q))\"") end - handle_exceptions(failures, "loading \"$(filename(q))\"") end -function save{F}(q::Formatted{F}, data...; options...) - unknown(q) && throw(UnknownFormat(q)) - libraries = applicable_savers(q) - failures = Any[] - for library in libraries - try - Library = checked_import(library) - if !has_method_from(methods(Library.save), Library) - throw(WriterError(string(library), "save not defined")) +for fn in (:save, :savestreaming) + @eval function $fn{F}(q::Formatted{F}, data...; options...) + unknown(q) && throw(UnknownFormat(q)) + libraries = applicable_savers(q) + failures = Any[] + for library in libraries + try + Library = checked_import(library) + if !has_method_from(methods(Library.$fn), Library) + throw(WriterError(string(library), "$fn not defined")) + end + return eval(Main, :($(Library.$fn)($q, $data...; $options...))) + catch e + push!(failures, (e, q)) end - return eval(Main, :($(Library.save)($q, $data...; $options...))) - catch e - push!(failures, (e, q)) end + handle_exceptions(failures, "saving \"$(filename(q))\"") end - handle_exceptions(failures, "saving \"$(filename(q))\"") end function has_method_from(mt, Library) From aaec8815990c3ea59486ef778a2e14bdb82f813f Mon Sep 17 00:00:00 2001 From: Spencer Russell Date: Wed, 28 Sep 2016 11:57:05 -0400 Subject: [PATCH 02/11] fixes do-notation so it returns whatever the given function returns --- src/loadsave.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/loadsave.jl b/src/loadsave.jl index 7342386a..af10a8a6 100644 --- a/src/loadsave.jl +++ b/src/loadsave.jl @@ -130,10 +130,12 @@ for fn in (:loadstreaming, :savestreaming) @eval function $fn(f::Function, args...; kwargs...) str = $fn(args...; kwargs...) try - f(str) + ret = f(str) finally close(str) end + + ret end end From c2ca8dbde431feb92e71f1a51830cb73fbf419ad Mon Sep 17 00:00:00 2001 From: Spencer Russell Date: Wed, 28 Sep 2016 11:59:40 -0400 Subject: [PATCH 03/11] moves docstrings to after functions are defined --- src/loadsave.jl | 100 ++++++++++++++++++++++++------------------------ 1 file changed, 49 insertions(+), 51 deletions(-) diff --git a/src/loadsave.jl b/src/loadsave.jl index af10a8a6..f0c47a88 100644 --- a/src/loadsave.jl +++ b/src/loadsave.jl @@ -41,57 +41,9 @@ add_loader add_saver -""" -- `load(filename)` loads the contents of a formatted file, trying to infer -the format from `filename` and/or magic bytes in the file. -- `load(strm)` loads from an `IOStream` or similar object. In this case, -the magic bytes are essential. -- `load(File(format"PNG",filename))` specifies the format directly, and bypasses inference. -- `load(f; options...)` passes keyword arguments on to the loader. -""" -load - -""" -Some packages may implement a streaming API, where the contents of the file can -be read in chunks and processed, rather than all at once. Reading from these -higher-level streams should return a formatted object, like an image or chunk of -video or audio. - -- `loadstreaming(filename)` loads the contents of a formatted file, trying to infer -the format from `filename` and/or magic bytes in the file. It returns a streaming -type that can be read from in chunks, rather than loading the whole contents all -at once -- `loadstreaming(strm)` loads the stream from an `IOStream` or similar object. In this case, -the magic bytes are essential. -- `load(File(format"PNG",filename))` specifies the format directly, and bypasses inference. -- `load(f; options...)` passes keyword arguments on to the loader. -""" -loadstreaming - -""" -- `save(filename, data...)` saves the contents of a formatted file, -trying to infer the format from `filename`. -- `save(Stream(format"PNG",io), data...)` specifies the format directly, and bypasses inference. -- `save(f, data...; options...)` passes keyword arguments on to the saver. -""" -save - -""" -Some packages may implement a streaming API, where the contents of the file can -be written in chunks, rather than all at once. These higher-level streams should -accept formatted objects, like an image or chunk of video or audio. - -- `savestreaming(filename, data...)` saves the contents of a formatted file, -trying to infer the format from `filename`. -- `savestreaming(Stream(format"PNG",io), data...)` specifies the format directly, and bypasses inference. -- `savestreaming(f, data...; options...)` passes keyword arguments on to the saver. -""" -savestreaming - - for fn in (:load, :loadstreaming, :save, :savestreaming) - @eval $fn(s::Union{AbstractString,IO}, data...; options...) = - $fn(query(s), data...; options...) + @eval $fn(s::@compat(Union{AbstractString,IO}), args...; options...) = + $fn(query(s), args...; options...) end function save(s::Union{AbstractString,IO}; options...) @@ -139,7 +91,6 @@ for fn in (:loadstreaming, :savestreaming) end end - # Fallbacks for fn in (:load, :loadstreaming) @eval function $fn{F}(q::Formatted{F}, args...; options...) @@ -183,6 +134,53 @@ for fn in (:save, :savestreaming) end end +""" +- `load(filename)` loads the contents of a formatted file, trying to infer +the format from `filename` and/or magic bytes in the file. +- `load(strm)` loads from an `IOStream` or similar object. In this case, +the magic bytes are essential. +- `load(File(format"PNG",filename))` specifies the format directly, and bypasses inference. +- `load(f; options...)` passes keyword arguments on to the loader. +""" +load + +""" +Some packages may implement a streaming API, where the contents of the file can +be read in chunks and processed, rather than all at once. Reading from these +higher-level streams should return a formatted object, like an image or chunk of +video or audio. + +- `loadstreaming(filename)` loads the contents of a formatted file, trying to infer +the format from `filename` and/or magic bytes in the file. It returns a streaming +type that can be read from in chunks, rather than loading the whole contents all +at once +- `loadstreaming(strm)` loads the stream from an `IOStream` or similar object. In this case, +the magic bytes are essential. +- `load(File(format"PNG",filename))` specifies the format directly, and bypasses inference. +- `load(f; options...)` passes keyword arguments on to the loader. +""" +loadstreaming + +""" +- `save(filename, data...)` saves the contents of a formatted file, +trying to infer the format from `filename`. +- `save(Stream(format"PNG",io), data...)` specifies the format directly, and bypasses inference. +- `save(f, data...; options...)` passes keyword arguments on to the saver. +""" +save + +""" +Some packages may implement a streaming API, where the contents of the file can +be written in chunks, rather than all at once. These higher-level streams should +accept formatted objects, like an image or chunk of video or audio. + +- `savestreaming(filename, data...)` saves the contents of a formatted file, +trying to infer the format from `filename`. +- `savestreaming(Stream(format"PNG",io), data...)` specifies the format directly, and bypasses inference. +- `savestreaming(f, data...; options...)` passes keyword arguments on to the saver. +""" +savestreaming + function has_method_from(mt, Library) for m in mt if getmodule(m) == Library From 7761bbf1a89b7d4bc509e2c62f68a6e2cd3fd5c6 Mon Sep 17 00:00:00 2001 From: Spencer Russell Date: Fri, 24 Nov 2017 11:54:41 -0500 Subject: [PATCH 04/11] some tweaks to clean up the diff --- README.md | 16 +++- src/loadsave.jl | 199 ++++++++++++++++++++++++++++-------------------- 2 files changed, 131 insertions(+), 84 deletions(-) diff --git a/README.md b/README.md index f4fa42f9..73eafbfc 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,10 @@ file format. Sometimes you want to read or write files that are larger than your available memory, or might be an unknown or infinite length (e.g. reading an audio or video stream from a socket). In these cases it might not make sense to process -the whole file at once, but instead process it a chunk at a time. For these situations FileIO provides the `loadstreaming` and `savestreaming` functions, which return an object that you can `read` or `write`, rather than the file data itself. +the whole file at once, but instead process it a chunk at a time. For these +situations FileIO provides the `loadstreaming` and `savestreaming` functions, +which return an object that you can `read` or `write`, rather than the file data +itself. This would look something like: @@ -69,7 +72,8 @@ do loadstreaming("bigfile.wav") audio end ``` -Note that in these cases you may want to use `read!` with a pre-allocated buffer for maximum efficiency. +Note that in these cases you may want to use `read!` with a pre-allocated buffer +for maximum efficiency. ## Adding new formats @@ -173,7 +177,13 @@ automatically even if the code inside the `do` scope throws an error.) Conversely, `load(::Stream)` and `save(::Stream)` should not close the input stream. -`loadstreaming` and `savestreaming` use the same query mechanism, but return a decoded stream that users can `read` or `write`. You should also implement a `close` method on your reader or writer type. Just like with `load` and `save`, if the user provided a filename, your `close` method should be responsible for closing any streams you opened in order to read or write the file. If you are given a `Stream`, your `close` method should only do the clean up for your reader or writer type, not close the stream. +`loadstreaming` and `savestreaming` use the same query mechanism, but return a +decoded stream that users can `read` or `write`. You should also implement a +`close` method on your reader or writer type. Just like with `load` and `save`, +if the user provided a filename, your `close` method should be responsible for +closing any streams you opened in order to read or write the file. If you are +given a `Stream`, your `close` method should only do the clean up for your +reader or writer type, not close the stream. ```julia struct WAVReader diff --git a/src/loadsave.jl b/src/loadsave.jl index f0c47a88..71d8e3ba 100644 --- a/src/loadsave.jl +++ b/src/loadsave.jl @@ -40,6 +40,52 @@ add_loader "`add_saver(fmt, :Package)` triggers `using Package` before saving format `fmt`" add_saver +""" +- `load(filename)` loads the contents of a formatted file, trying to infer +the format from `filename` and/or magic bytes in the file. +- `load(strm)` loads from an `IOStream` or similar object. In this case, +the magic bytes are essential. +- `load(File(format"PNG",filename))` specifies the format directly, and bypasses inference. +- `load(f; options...)` passes keyword arguments on to the loader. +""" +load + +""" +Some packages may implement a streaming API, where the contents of the file can +be read in chunks and processed, rather than all at once. Reading from these +higher-level streams should return a formatted object, like an image or chunk of +video or audio. + +- `loadstreaming(filename)` loads the contents of a formatted file, trying to infer +the format from `filename` and/or magic bytes in the file. It returns a streaming +type that can be read from in chunks, rather than loading the whole contents all +at once +- `loadstreaming(strm)` loads the stream from an `IOStream` or similar object. In this case, +the magic bytes are essential. +- `load(File(format"PNG",filename))` specifies the format directly, and bypasses inference. +- `load(f; options...)` passes keyword arguments on to the loader. +""" +loadstreaming + +""" +- `save(filename, data...)` saves the contents of a formatted file, +trying to infer the format from `filename`. +- `save(Stream(format"PNG",io), data...)` specifies the format directly, and bypasses inference. +- `save(f, data...; options...)` passes keyword arguments on to the saver. +""" +save + +""" +Some packages may implement a streaming API, where the contents of the file can +be written in chunks, rather than all at once. These higher-level streams should +accept formatted objects, like an image or chunk of video or audio. + +- `savestreaming(filename, data...)` saves the contents of a formatted file, +trying to infer the format from `filename`. +- `savestreaming(Stream(format"PNG",io), data...)` specifies the format directly, and bypasses inference. +- `savestreaming(f, data...; options...)` passes keyword arguments on to the saver. +""" +savestreaming for fn in (:load, :loadstreaming, :save, :savestreaming) @eval $fn(s::@compat(Union{AbstractString,IO}), args...; options...) = @@ -63,12 +109,14 @@ function savestreaming{sym}(df::Type{DataFormat{sym}}, s::IO, data...; options.. checked_import(libraries[1]) eval(Main, :($savestreaming($Stream($(DataFormat{sym}), $s), $data...; $options...))) +end function save{sym}(df::Type{DataFormat{sym}}, s::IO, data...; options...) libraries = applicable_savers(df) checked_import(libraries[1]) eval(Main, :($save($Stream($(DataFormat{sym}), $s), $data...; $options...))) +end function savestreaming{sym}(df::Type{DataFormat{sym}}, f::AbstractString, data...; options...) libraries = applicable_savers(df) @@ -82,104 +130,93 @@ for fn in (:loadstreaming, :savestreaming) @eval function $fn(f::Function, args...; kwargs...) str = $fn(args...; kwargs...) try - ret = f(str) + f(str) finally close(str) end - - ret end end # Fallbacks -for fn in (:load, :loadstreaming) - @eval function $fn{F}(q::Formatted{F}, args...; options...) - if unknown(q) - isfile(filename(q)) || open(filename(q)) # force systemerror - throw(UnknownFormat(q)) - end - libraries = applicable_loaders(q) - failures = Any[] - for library in libraries - try - Library = checked_import(library) - if !has_method_from(methods(Library.$fn), Library) - throw(LoaderError(string(library), "$fn not defined")) - end - return eval(Main, :($(Library.$fn)($q, $args...; $options...))) - catch e - push!(failures, (e, q)) + +# TODO: this definitely should be refactored to reduce duplication +function load{F}(q::Formatted{F}, args...; options...) + if unknown(q) + isfile(filename(q)) || open(filename(q)) # force systemerror + throw(UnknownFormat(q)) + end + libraries = applicable_loaders(q) + failures = Any[] + for library in libraries + try + Library = checked_import(library) + if !has_method_from(methods(Library.load), Library) + throw(LoaderError(string(library), "load not defined")) end + return eval(Main, :($(Library.load)($q, $args...; $options...))) + catch e + push!(failures, (e, q)) end - handle_exceptions(failures, "loading \"$(filename(q))\"") end + handle_exceptions(failures, "loading \"$(filename(q))\"") end -for fn in (:save, :savestreaming) - @eval function $fn{F}(q::Formatted{F}, data...; options...) - unknown(q) && throw(UnknownFormat(q)) - libraries = applicable_savers(q) - failures = Any[] - for library in libraries - try - Library = checked_import(library) - if !has_method_from(methods(Library.$fn), Library) - throw(WriterError(string(library), "$fn not defined")) - end - return eval(Main, :($(Library.$fn)($q, $data...; $options...))) - catch e - push!(failures, (e, q)) + +function loadstreaming{F}(q::Formatted{F}, args...; options...) + if unknown(q) + isfile(filename(q)) || open(filename(q)) # force systemerror + throw(UnknownFormat(q)) + end + libraries = applicable_loaders(q) + failures = Any[] + for library in libraries + try + Library = checked_import(library) + if !has_method_from(methods(Library.loadstreaming), Library) + throw(LoaderError(string(library), "loadstreaming not defined")) end + return eval(Main, :($(Library.loadstreaming)($q, $args...; $options...))) + catch e + push!(failures, (e, q)) end - handle_exceptions(failures, "saving \"$(filename(q))\"") end + handle_exceptions(failures, "opening \"$(filename(q))\" for streamed loading") end -""" -- `load(filename)` loads the contents of a formatted file, trying to infer -the format from `filename` and/or magic bytes in the file. -- `load(strm)` loads from an `IOStream` or similar object. In this case, -the magic bytes are essential. -- `load(File(format"PNG",filename))` specifies the format directly, and bypasses inference. -- `load(f; options...)` passes keyword arguments on to the loader. -""" -load - -""" -Some packages may implement a streaming API, where the contents of the file can -be read in chunks and processed, rather than all at once. Reading from these -higher-level streams should return a formatted object, like an image or chunk of -video or audio. - -- `loadstreaming(filename)` loads the contents of a formatted file, trying to infer -the format from `filename` and/or magic bytes in the file. It returns a streaming -type that can be read from in chunks, rather than loading the whole contents all -at once -- `loadstreaming(strm)` loads the stream from an `IOStream` or similar object. In this case, -the magic bytes are essential. -- `load(File(format"PNG",filename))` specifies the format directly, and bypasses inference. -- `load(f; options...)` passes keyword arguments on to the loader. -""" -loadstreaming - -""" -- `save(filename, data...)` saves the contents of a formatted file, -trying to infer the format from `filename`. -- `save(Stream(format"PNG",io), data...)` specifies the format directly, and bypasses inference. -- `save(f, data...; options...)` passes keyword arguments on to the saver. -""" -save - -""" -Some packages may implement a streaming API, where the contents of the file can -be written in chunks, rather than all at once. These higher-level streams should -accept formatted objects, like an image or chunk of video or audio. +function save{F}(q::Formatted{F}, data...; options...) + unknown(q) && throw(UnknownFormat(q)) + libraries = applicable_savers(q) + failures = Any[] + for library in libraries + try + Library = checked_import(library) + if !has_method_from(methods(Library.save), Library) + throw(WriterError(string(library), "save not defined")) + end + return eval(Main, :($(Library.save)($q, $data...; $options...))) + catch e + push!(failures, (e, q)) + end + end + handle_exceptions(failures, "saving \"$(filename(q))\"") +end -- `savestreaming(filename, data...)` saves the contents of a formatted file, -trying to infer the format from `filename`. -- `savestreaming(Stream(format"PNG",io), data...)` specifies the format directly, and bypasses inference. -- `savestreaming(f, data...; options...)` passes keyword arguments on to the saver. -""" -savestreaming +function savestreaming{F}(q::Formatted{F}, data...; options...) + unknown(q) && throw(UnknownFormat(q)) + libraries = applicable_savers(q) + failures = Any[] + for library in libraries + try + Library = checked_import(library) + if !has_method_from(methods(Library.savestreaming), Library) + throw(WriterError(string(library), "savestreaming not defined")) + end + return eval(Main, :($(Library.savestreaming)($q, $data...; $options...))) + catch e + push!(failures, (e, q)) + end + end + handle_exceptions(failures, "opening \"$(filename(q))\" for streamed saving") +end function has_method_from(mt, Library) for m in mt From 6c8e1fb21e50fc9ae0a75d4f248633ecf0e32cfe Mon Sep 17 00:00:00 2001 From: Spencer Russell Date: Fri, 24 Nov 2017 23:19:28 -0500 Subject: [PATCH 05/11] clarifies some comments, adds Dummy reader/writer structs --- src/FileIO.jl | 4 +-- src/loadsave.jl | 11 +++++--- test/loadsave.jl | 70 ++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 75 insertions(+), 10 deletions(-) diff --git a/src/FileIO.jl b/src/FileIO.jl index b53bad46..9e2c1eda 100644 --- a/src/FileIO.jl +++ b/src/FileIO.jl @@ -42,9 +42,9 @@ include("registry.jl") - `load([filename|stream])`: read data in formatted file, inferring the format - `load(File(format"PNG",filename))`: specify the format manually -- `loadstreaming(f)`: similar to `load`, except that it returns an object that can be read from +- `loadstreaming([filename|stream])`: similar to `load`, except that it returns an object that can be read from - `save(filename, data...)` for similar operations involving saving data -- `savestreaming(f)`: similar to `save`, except that it returns an object that can be written to +- `savestreaming([filename|stream])`: similar to `save`, except that it returns an object that can be written to - `io = open(f::File, args...)` opens a file - `io = stream(s::Stream)` returns the IOStream from the query object `s` diff --git a/src/loadsave.jl b/src/loadsave.jl index 71d8e3ba..66259ba7 100644 --- a/src/loadsave.jl +++ b/src/loadsave.jl @@ -87,16 +87,19 @@ trying to infer the format from `filename`. """ savestreaming +# if a bare filename or IO stream are given, query for the format and dispatch +# to the formatted handlers below for fn in (:load, :loadstreaming, :save, :savestreaming) - @eval $fn(s::@compat(Union{AbstractString,IO}), args...; options...) = + @eval $fn(s::Union{AbstractString,IO}, args...; options...) = $fn(query(s), args...; options...) end +# return a save function, so you can do `thing_to_save |> save("filename.ext")` function save(s::Union{AbstractString,IO}; options...) data -> save(s, data; options...) end -# Forced format +# Allow format to be overridden with first argument function save{sym}(df::Type{DataFormat{sym}}, f::AbstractString, data...; options...) libraries = applicable_savers(df) checked_import(libraries[1]) @@ -137,7 +140,7 @@ for fn in (:loadstreaming, :savestreaming) end end -# Fallbacks +# Handlers for formatted files/streams # TODO: this definitely should be refactored to reduce duplication function load{F}(q::Formatted{F}, args...; options...) @@ -218,6 +221,8 @@ function savestreaming{F}(q::Formatted{F}, data...; options...) handle_exceptions(failures, "opening \"$(filename(q))\" for streamed saving") end +# returns true if the given method table includes a method defined by the given +# module, false otherwise function has_method_from(mt, Library) for m in mt if getmodule(m) == Library diff --git a/test/loadsave.jl b/test/loadsave.jl index 0be66992..be0ab992 100644 --- a/test/loadsave.jl +++ b/test/loadsave.jl @@ -61,15 +61,76 @@ module Dummy using FileIO -function load(file::File{format"DUMMY"}) +mutable struct DummyReader{IOtype} + stream::IOtype + ownstream::Bool + bytesleft::Int64 +end + +function DummyReader(stream, ownstream) + read(stream, 5) == magic(format"DUMMY") || error("wrong magic bytes") + DummyReader(stream, ownstream, read(stream, Int64)) +end + +function Base.read(stream::DummyReader, n) + toread = min(n, stream.bytesleft) + buf = read(stream.stream, toread) + stream.bytesleft -= length(buf) + buf +end + +Base.eof(stream::DummyReader) = stream.bytesleft == 0 || eof(stream.stream) +Base.close(stream::DummyReader) = stream.ownstream && close(stream.stream) + +mutable struct DummyWriter{IOtype} + stream::IOtype + ownstream::Bool + headerpos::Int + byteswritten::Int +end + +function DummyWriter(stream, ownstream) + write(stream, magic(format"DUMMY")) # Write the magic bytes + # store the position where we'll need to write the length + pos = position(stream) + # write a dummy length value + write(stream, 0xffffffffffffffff) + DummyWriter(stream, ownstream, pos, 0) +end + +function Base.write(stream::DummyWriter, data) + udata = convert(Vector{UInt8}, data) + n = write(stream.stream, udata) + stream.byteswritten += n + + n +end + +function Base.close(stream::DummyWriter) + here = position(stream.stream) + # go back and write the header + seek(stream.stream, stream.headerpos) + write(stream.stream, convert(Int64, stream.byteswritten)) + seek(stream.stream, here) + stream.ownstream && close(stream.stream) + + nothing +end + +loadstreaming(s::Stream{format"DUMMY"}) = DummyReader(s, false) +loadstreaming(file::File{format"DUMMY"}) = DummyReader(open(file), true) +savestreaming(s::Stream{format"DUMMY"}) = DummyWriter(s, false) +savestreaming(file::File{format"DUMMY"}) = DummyWriter(open(file, "w"), true) + +# we could implement `load` and `save` in terms of their streaming versions +function FileIO.load(file::File{format"DUMMY"}) open(file) do s - skipmagic(s) load(s) end end -function load(s::Stream{format"DUMMY"}) - # We're already past the magic bytes +function FileIO.load(s::Stream{format"DUMMY"}) + skipmagic(s) n = read(s, Int64) out = Vector{UInt8}(n) read!(s, out) @@ -133,7 +194,6 @@ add_saver(format"DUMMY", :Dummy) @test a == b b = open(query(fn)) do s - skipmagic(s) load(s) end @test a == b From 034d5b3c0fcc84a222299de5823e823c5c3122a5 Mon Sep 17 00:00:00 2001 From: Spencer Russell Date: Fri, 24 Nov 2017 23:43:10 -0500 Subject: [PATCH 06/11] adds some tests for loadstreaming and savestreaming --- test/loadsave.jl | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/test/loadsave.jl b/test/loadsave.jl index be0ab992..ab30ca52 100644 --- a/test/loadsave.jl +++ b/test/loadsave.jl @@ -72,7 +72,7 @@ function DummyReader(stream, ownstream) DummyReader(stream, ownstream, read(stream, Int64)) end -function Base.read(stream::DummyReader, n) +function Base.read(stream::DummyReader, n=stream.bytesleft) toread = min(n, stream.bytesleft) buf = read(stream.stream, toread) stream.bytesleft -= length(buf) @@ -217,6 +217,38 @@ add_saver(format"DUMMY", :Dummy) end rm(fn) + # streaming I/O with filenames + fn = string(tempname(), ".dmy") + save(fn, a) + loadstreaming(fn) do reader + @test read(reader) == a + end + rm(fn) + savestreaming(fn) do writer + write(writer, a) + end + @test load(fn) == a + rm(fn) + + # streaming I/O with streams + save(fn, a) + open(fn) do io + loadstreaming(io) do reader + @test read(reader) == a + end + @test isopen(io) + end + rm(fn) + open(fn, "w") do io + savestreaming(format"DUMMY", io) do writer + write(writer, a) + end + @test isopen(io) + end + @test load(fn) == a + rm(fn) + + @test_throws Exception save("missing.fmt",5) end @@ -270,6 +302,8 @@ end @test typeof(query(fn)) == File{format"AmbigExt1"} rm(fn) + del_format(format"AmbigExt1") + del_format(format"AmbigExt2") end @testset "Absent file" begin From 4346a8922617682792355a1af7430bff24fcccf7 Mon Sep 17 00:00:00 2001 From: Spencer Russell Date: Fri, 24 Nov 2017 23:59:54 -0500 Subject: [PATCH 07/11] adds a test for forced-format with filename --- test/loadsave.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/loadsave.jl b/test/loadsave.jl index ab30ca52..80b1e352 100644 --- a/test/loadsave.jl +++ b/test/loadsave.jl @@ -230,6 +230,14 @@ add_saver(format"DUMMY", :Dummy) @test load(fn) == a rm(fn) + # force format + fn = string(tempname(), ".dmy") + savestreaming(format"DUMMY", fn) do writer + write(writer, a) + end + @test load(fn) == a + rm(fn) + # streaming I/O with streams save(fn, a) open(fn) do io From ff9fe573382f807d40a1327c67496d2c2655e5d1 Mon Sep 17 00:00:00 2001 From: Spencer Russell Date: Sat, 25 Nov 2017 00:03:07 -0500 Subject: [PATCH 08/11] fix for 32-bit, apparently position(s) always returns Int64 --- test/loadsave.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/loadsave.jl b/test/loadsave.jl index 80b1e352..b8c4470e 100644 --- a/test/loadsave.jl +++ b/test/loadsave.jl @@ -85,7 +85,7 @@ Base.close(stream::DummyReader) = stream.ownstream && close(stream.stream) mutable struct DummyWriter{IOtype} stream::IOtype ownstream::Bool - headerpos::Int + headerpos::Int64 byteswritten::Int end From 5fb37db10cf9e07a983b52150b83fbbd46630e68 Mon Sep 17 00:00:00 2001 From: Spencer Russell Date: Sat, 25 Nov 2017 00:29:56 -0500 Subject: [PATCH 09/11] some README fixes [ci skip] --- README.md | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 73eafbfc..122c0dd7 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ try # process the chunk end finally - close(stream) + close(audio) end ``` @@ -64,7 +64,7 @@ or use `do` syntax to auto-close the stream: ```jl using FileIO -do loadstreaming("bigfile.wav") audio +loadstreaming("bigfile.wav") do audio while !eof(audio) chunk = read(audio, 4096) # read 4096 frames # process the chunk @@ -185,7 +185,7 @@ closing any streams you opened in order to read or write the file. If you are given a `Stream`, your `close` method should only do the clean up for your reader or writer type, not close the stream. -```julia +```jl struct WAVReader io::IO ownstream::Bool @@ -197,28 +197,26 @@ end function close(reader::WAVReader) # do whatever cleanup the reader needs - if reader.ownstream - close(reader.io) - end + reader.ownstream && close(reader.io) end -loadstreaming(f::File{format"WAV"}) = WAVReader(open(f), ownstream=true) -loadstreaming(s::Stream{format"WAV"}) = WAVReader(s, ownstream=false) +loadstreaming(f::File{format"WAV"}) = WAVReader(open(f), true) +loadstreaming(s::Stream{format"WAV"}) = WAVReader(s, false) # FileIO has fallback functions that make these work using `do` syntax as well. ``` If you choose to implement `loadstreaming` and `savestreaming` in your package, you can easily add `save` and `load` methods in the form of: -```julia +```jl function save(q::Formatted{format"WAV"}, data, args...; kwargs...) - savestreaming(args...; kwargs...) do stream + savestreaming(q, args...; kwargs...) do stream write(stream, data) end end function load(q::Formatted{format"WAV"}, args...; kwargs...) - savestreaming(args...; kwargs...) do stream - readall(stream) + savestreaming(q, args...; kwargs...) do stream + read(stream) end end ``` From 0f5930c79b9cf58cc2d6f7a8b26dce44b15d9a2f Mon Sep 17 00:00:00 2001 From: Spencer Russell Date: Sat, 25 Nov 2017 12:10:46 -0500 Subject: [PATCH 10/11] brings duplication down to previous levels --- src/loadsave.jl | 100 ++++++++++++++++-------------------------------- 1 file changed, 32 insertions(+), 68 deletions(-) diff --git a/src/loadsave.jl b/src/loadsave.jl index 66259ba7..2da049d5 100644 --- a/src/loadsave.jl +++ b/src/loadsave.jl @@ -142,83 +142,47 @@ end # Handlers for formatted files/streams -# TODO: this definitely should be refactored to reduce duplication -function load{F}(q::Formatted{F}, args...; options...) - if unknown(q) - isfile(filename(q)) || open(filename(q)) # force systemerror - throw(UnknownFormat(q)) - end - libraries = applicable_loaders(q) - failures = Any[] - for library in libraries - try - Library = checked_import(library) - if !has_method_from(methods(Library.load), Library) - throw(LoaderError(string(library), "load not defined")) - end - return eval(Main, :($(Library.load)($q, $args...; $options...))) - catch e - push!(failures, (e, q)) +for fn in (:load, :loadstreaming) + @eval function $fn{F}(q::Formatted{F}, args...; options...) + if unknown(q) + isfile(filename(q)) || open(filename(q)) # force systemerror + throw(UnknownFormat(q)) end - end - handle_exceptions(failures, "loading \"$(filename(q))\"") -end - -function loadstreaming{F}(q::Formatted{F}, args...; options...) - if unknown(q) - isfile(filename(q)) || open(filename(q)) # force systemerror - throw(UnknownFormat(q)) - end - libraries = applicable_loaders(q) - failures = Any[] - for library in libraries - try - Library = checked_import(library) - if !has_method_from(methods(Library.loadstreaming), Library) - throw(LoaderError(string(library), "loadstreaming not defined")) + libraries = applicable_loaders(q) + failures = Any[] + for library in libraries + try + Library = checked_import(library) + if !has_method_from(methods(Library.$fn), Library) + throw(LoaderError(string(library), "$($fn) not defined")) + end + return eval(Main, :($(Library.$fn)($q, $args...; $options...))) + catch e + push!(failures, (e, q)) end - return eval(Main, :($(Library.loadstreaming)($q, $args...; $options...))) - catch e - push!(failures, (e, q)) end + handle_exceptions(failures, "loading \"$(filename(q))\"") end - handle_exceptions(failures, "opening \"$(filename(q))\" for streamed loading") end -function save{F}(q::Formatted{F}, data...; options...) - unknown(q) && throw(UnknownFormat(q)) - libraries = applicable_savers(q) - failures = Any[] - for library in libraries - try - Library = checked_import(library) - if !has_method_from(methods(Library.save), Library) - throw(WriterError(string(library), "save not defined")) - end - return eval(Main, :($(Library.save)($q, $data...; $options...))) - catch e - push!(failures, (e, q)) - end - end - handle_exceptions(failures, "saving \"$(filename(q))\"") -end - -function savestreaming{F}(q::Formatted{F}, data...; options...) - unknown(q) && throw(UnknownFormat(q)) - libraries = applicable_savers(q) - failures = Any[] - for library in libraries - try - Library = checked_import(library) - if !has_method_from(methods(Library.savestreaming), Library) - throw(WriterError(string(library), "savestreaming not defined")) +for fn in (:save, :savestreaming) + @eval function $fn{F}(q::Formatted{F}, data...; options...) + unknown(q) && throw(UnknownFormat(q)) + libraries = applicable_savers(q) + failures = Any[] + for library in libraries + try + Library = checked_import(library) + if !has_method_from(methods(Library.$fn), Library) + throw(WriterError(string(library), "$($fn) not defined")) + end + return eval(Main, :($(Library.$fn)($q, $data...; $options...))) + catch e + push!(failures, (e, q)) end - return eval(Main, :($(Library.savestreaming)($q, $data...; $options...))) - catch e - push!(failures, (e, q)) end + handle_exceptions(failures, "saving \"$(filename(q))\"") end - handle_exceptions(failures, "opening \"$(filename(q))\" for streamed saving") end # returns true if the given method table includes a method defined by the given From 239af175f3ebbfbd0e5ec4e438e0ed82fa0b63bb Mon Sep 17 00:00:00 2001 From: Spencer Russell Date: Sat, 17 Mar 2018 23:35:05 -0400 Subject: [PATCH 11/11] fixes some documentation issues spotted by @timholy --- README.md | 15 +++++++++------ src/loadsave.jl | 24 +++++++++++++++++------- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 122c0dd7..499b00d0 100644 --- a/README.md +++ b/README.md @@ -168,8 +168,9 @@ end Note that these are `load` and `save`, **not** `FileIO.load` and `FileIO.save`. Because a given format might have multiple packages that are capable of reading it, FileIO will dispatch to these using module-scoping, e.g., `SomePkg.load(args...)`. -Consequently, **packages should define "private" `load` and `save` methods, and -not extend (import) FileIO's**. +Consequently, **packages should define "private" `load` and `save` methods (also +`loadstreaming` and `savestreaming` if you implement them), and not extend +(import) FileIO's**. `load(::File)` and `save(::File)` should close any streams they open. (If you use the `do` syntax, this happens for you @@ -191,17 +192,19 @@ struct WAVReader ownstream::Bool end -function read(reader::WAVReader, frames::Int) +function Base.read(reader::WAVReader, frames::Int) # read and decode audio samples from reader.io end -function close(reader::WAVReader) +function Base.close(reader::WAVReader) # do whatever cleanup the reader needs reader.ownstream && close(reader.io) end + +# FileIO has fallback functions that make these work using `do` syntax as well, +# and will automatically call `close` on the returned object. loadstreaming(f::File{format"WAV"}) = WAVReader(open(f), true) loadstreaming(s::Stream{format"WAV"}) = WAVReader(s, false) -# FileIO has fallback functions that make these work using `do` syntax as well. ``` If you choose to implement `loadstreaming` and `savestreaming` in your package, @@ -215,7 +218,7 @@ function save(q::Formatted{format"WAV"}, data, args...; kwargs...) end function load(q::Formatted{format"WAV"}, args...; kwargs...) - savestreaming(q, args...; kwargs...) do stream + loadstreaming(q, args...; kwargs...) do stream read(stream) end end diff --git a/src/loadsave.jl b/src/loadsave.jl index 2da049d5..3cec1518 100644 --- a/src/loadsave.jl +++ b/src/loadsave.jl @@ -44,8 +44,10 @@ add_saver - `load(filename)` loads the contents of a formatted file, trying to infer the format from `filename` and/or magic bytes in the file. - `load(strm)` loads from an `IOStream` or similar object. In this case, -the magic bytes are essential. -- `load(File(format"PNG",filename))` specifies the format directly, and bypasses inference. +there is no filename extension, so we rely on the magic bytes for format +identification. +- `load(File(format"PNG", filename))` specifies the format directly, and bypasses inference. +- `load(Stream(format"PNG", io))` specifies the format directly, and bypasses inference. - `load(f; options...)` passes keyword arguments on to the loader. """ load @@ -60,10 +62,14 @@ video or audio. the format from `filename` and/or magic bytes in the file. It returns a streaming type that can be read from in chunks, rather than loading the whole contents all at once -- `loadstreaming(strm)` loads the stream from an `IOStream` or similar object. In this case, -the magic bytes are essential. -- `load(File(format"PNG",filename))` specifies the format directly, and bypasses inference. -- `load(f; options...)` passes keyword arguments on to the loader. +- `loadstreaming(strm)` loads the stream from an `IOStream` or similar object. +In this case, there is no filename extension, so we rely on the magic bytes +for format identification. +- `loadstreaming(File(format"WAV",filename))` specifies the format directly, and +bypasses inference. +- `loadstreaming(Stream(format"WAV", io))` specifies the format directly, and +bypasses inference. +- `loadstreaming(f; options...)` passes keyword arguments on to the loader. """ loadstreaming @@ -71,6 +77,7 @@ loadstreaming - `save(filename, data...)` saves the contents of a formatted file, trying to infer the format from `filename`. - `save(Stream(format"PNG",io), data...)` specifies the format directly, and bypasses inference. +- `save(File(format"PNG",filename), data...)` specifies the format directly, and bypasses inference. - `save(f, data...; options...)` passes keyword arguments on to the saver. """ save @@ -82,7 +89,10 @@ accept formatted objects, like an image or chunk of video or audio. - `savestreaming(filename, data...)` saves the contents of a formatted file, trying to infer the format from `filename`. -- `savestreaming(Stream(format"PNG",io), data...)` specifies the format directly, and bypasses inference. +- `savestreaming(File(format"WAV",filename))` specifies the format directly, and +bypasses inference. +- `savestreaming(Stream(format"WAV", io))` specifies the format directly, and +bypasses inference. - `savestreaming(f, data...; options...)` passes keyword arguments on to the saver. """ savestreaming