Skip to content

Commit

Permalink
cleanup API
Browse files Browse the repository at this point in the history
  • Loading branch information
Lyndon White committed May 7, 2018
1 parent 90d8227 commit caef6a1
Show file tree
Hide file tree
Showing 5 changed files with 194 additions and 89 deletions.
80 changes: 6 additions & 74 deletions src/DataDeps.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,81 +5,12 @@ using HTTP
using Reexport
@reexport using SHA

export RegisterDataDep, @datadep_str, unpack
export DataDep, ManualDataDep
export register, resolve, @datadep_str
export unpack

abstract type AbstractDataDep end

"""
ManualDataDep(name, message)
"""
struct ManualDataDep <: AbstractDataDep
name::String
message::String
end

struct DataDep{H, R, F, P} <: AbstractDataDep
name::String
remotepath::R
hash::H
fetch_method::F
post_fetch_method::P
extra_message::String
end

macro datadep_str(path)
quote
parts = splitpath($(esc(path)))
name = first(parts)
inner_path = length(parts) > 1 ? joinpath(Iterators.drop(parts, 1)...) : ""
resolve(registry[name], inner_path, @__FILE__)
end
end

"""
resolve(datadep, inner_filepath, calling_filepath)
Returns a path to the folder containing the datadep.
Even if that means downloading the dependancy and putting it in there.
- `inner_filepath` is the path to the file within the data dir
- `calling_filepath` is a path to the file where this is being invoked from
This is basically the function the lives behind the string macro `datadep"DepName/inner_filepath"`.
"""
function resolve(datadep::AbstractDataDep, inner_filepath, calling_filepath)::String
while true
dirpath = _resolve(datadep, calling_filepath)
filepath = joinpath(dirpath, inner_filepath)

if can_read_file(filepath)
return realpath(filepath) # resolve any symlinks for maximum compatibility with external applications
else # Something has gone wrong
warn("DataDep $(datadep.name) found at \"$(dirpath)\". But could not read file at \"$(filepath)\".")
warn("Something has gone wrong. What would you like to do?")
input_choice(
('A', "Abort -- this will error out",
()->error("Aborted resolving data dependency, program could not continue.")),
('R', "Retry -- do this after fixing the problem outside of this script",
()->nothing), # nothing to do
('X', "Remove directory and retry -- will retrigger download if there isn't another copy elsewhere",
()->rm(dirpath, force=true, recursive=true);
)
)
end
end
end

"The core of the resolve function without any user friendly stuff, returns the directory"
function _resolve(datadep::AbstractDataDep, calling_filepath)::String
lp = try_determine_load_path(datadep.name, calling_filepath)
dirpath = if !isnull(lp)
get(lp)
else
handle_missing(datadep, calling_filepath)
end
end

include("types.jl")

include("util.jl")
include("registration.jl")
Expand All @@ -88,10 +19,11 @@ include("filename_solving.jl")
include("locations.jl")
include("verification.jl")


include("resolution.jl")
include("resolution_automatic.jl")
include("resolution_manual.jl")

include("helpers.jl")
include("deprecations.jl")

end # module
23 changes: 23 additions & 0 deletions src/deprecations.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# This file is a part of DataDeps.jl. License is MIT.

@deprecate(
RegisterDataDep(name::String,
message::String,
remotepath,
hash=nothing;
fetch_method=download,
post_fetch_method=identity,
),
register(DataDep(name::String,
message::String,
remotepath,
hash;
fetch_method=fetch_method,
post_fetch_method=post_fetch_method)
)
)

@deprecate(
RegisterDataDep(name::String, message::String),
register(ManualDataDep(name, message))
)
23 changes: 8 additions & 15 deletions src/registration.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,19 @@

const registry = Dict{String, AbstractDataDep}()

function RegisterDataDep(name::String,
message::String,
remotepath,
hash=nothing;
fetch_method=download,
post_fetch_method=identity,
)
function register(datadep::AbstractDataDep)
name = datadep.name
if haskey(registry, name)
warn("Over-writing registration of the datadep: $name")
end
registry[name] = DataDep(name,remotepath,hash,fetch_method,post_fetch_method, message)
end

function RegisterDataDep(name::String, message::String)
if haskey(registry, name)
warn("Over-writing registration of the datadep: $name")
if !is_valid_name(name)
error(name, " is not a valid name for a datadep. Valid names must be legal foldernames on Windows.")
end
registry[name] = ManualDataDep(name, message)

registry[name] = datadep
end


"""
is_valid_name(name)
Expand All @@ -31,5 +24,5 @@ This basically means it must be a valid folder name on windows.
"""
function is_valid_name(name)
namechars = collect(name)
!any( namechars .∈ "\\/:*?<>|") && !any(Base.UTF8prox.iscntrl.(namechars))
!any( namechars .∈ "\\/:*?<>|") && !any(iscntrl.(namechars))
end
74 changes: 74 additions & 0 deletions src/resolution.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# This file is a part of DataDeps.jl. License is MIT.

"""
`datadep"Name"` or `datadep"Name/file"`
Use this just like you would a file path, except that you can refer by name to the datadep.
The name alone will resolve to the corresponding folder.
Even if that means it has to be downloaded first.
Adding a path within it functions as expected.
"""
macro datadep_str(path)
quote
parts = splitpath($(esc(path)))
name = first(parts)
inner_path = length(parts) > 1 ? joinpath(Iterators.drop(parts, 1)...) : ""
resolve(name, inner_path, @__FILE__)
end
end




"""
resolve(datadep, inner_filepath, calling_filepath)
Returns a path to the folder containing the datadep.
Even if that means downloading the dependancy and putting it in there.
- `inner_filepath` is the path to the file within the data dir
- `calling_filepath` is a path to the file where this is being invoked from
This is basically the function the lives behind the string macro `datadep"DepName/inner_filepath"`.
"""
function resolve(datadep::AbstractDataDep, inner_filepath, calling_filepath)::String
while true
dirpath = _resolve(datadep, calling_filepath)
filepath = joinpath(dirpath, inner_filepath)

if can_read_file(filepath)
return realpath(filepath) # resolve any symlinks for maximum compatibility with external applications
else # Something has gone wrong
warn("DataDep $(datadep.name) found at \"$(dirpath)\". But could not read file at \"$(filepath)\".")
warn("Something has gone wrong. What would you like to do?")
input_choice(
('A', "Abort -- this will error out",
()->error("Aborted resolving data dependency, program could not continue.")),
('R', "Retry -- do this after fixing the problem outside of this script",
()->nothing), # nothing to do
('X', "Remove directory and retry -- will retrigger download if there isn't another copy elsewhere",
()->rm(dirpath, force=true, recursive=true);
)
)
end
end
end

"""
Passing resolve a string rather than an actual datadep object works to look up
the data dep from the registry.
This is useful for progammatic downloading.
"""
function resolve(datadep_name::AbstractString, inner_filepath, calling_filepath)::String
resolve(registry[datadep_name], inner_filepath, calling_filepath)
end

"The core of the resolve function without any user friendly stuff, returns the directory"
function _resolve(datadep::AbstractDataDep, calling_filepath)::String
lp = try_determine_load_path(datadep.name, calling_filepath)
dirpath = if !isnull(lp)
get(lp)
else
handle_missing(datadep, calling_filepath)
end
end
83 changes: 83 additions & 0 deletions src/types.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# This file is a part of DataDeps.jl. License is MIT.


abstract type AbstractDataDep end

"""
ManualDataDep(name, message)
A DataDep for if the installation needs to be handled manually.
This can be done via Pkg/git if you put the dependency into the packages repo's `/deps/data` directory.
More generally, message should give instructions on how to setup the data.
"""
struct ManualDataDep <: AbstractDataDep
name::String
message::String
end

struct DataDep{H, R, F, P} <: AbstractDataDep
name::String
remotepath::R
hash::H
fetch_method::F
post_fetch_method::P
extra_message::String
end



"""
```
DataDep(
name::String,
message::String,
remote_path::Union{String,Vector{String}...},
[checksum::Union{String,Vector{String}...},]; # Optional, if not provided will generate
# keyword args (Optional):
fetch_method=download # (remote_filepath, local_filepath)->Any
post_fetch_method=download # (local_filepath)->Any
)
```
#### Required Fields
- *Name**: the name used to refer to this datadep, coresponds to a folder name where it will be stored
- It can have spaces or any other character that is allowed in a Windows filestring (which is a strict subset of the restriction for unix filenames).
- *Message*: A message displayed to the user for they are asked if they want to downloaded it.
- This is normally used to give a link to the original source of the data, a paper to be cited etc.
- *remote_path*: where to fetch the data from. Normally a string or strings) containing an URL
- This is usually a string, or a vector of strings (or a vector of vector... see [Recursive Structure](Recursive Structure) below)
#### Optional Fields
- *checksum* this is very flexible, it is used to check the files downloaded correctly
- By far the most common use is to just provide a SHA256 sum as a hex-string for the files
- If not provided, then a warning message with the SHA256 sum is displayed. This is to help package devs workout the sum for there files, without using an external tool.
- If you want to use a different hashing algorithm, then you can provide a tuple `(hashfun, targethex)`
- `hashfun` should be a function which takes an IOStream, and returns a `Vector{UInt8}`.
- Such as any of the functions from [SHA.jl](https://github.com/staticfloat/SHA.jl), eg `sha3_384`, `sha1_512`
- or `md5` from [MD5.jl](https://github.com/oxinabox/MD5.jl)
- If you want to use a different hashing algorithm, but don't know the sum, you can provide just the `hashfun` and a warning message will be displayed, giving the correct tuple of `(hashfun, targethex)` that should be added to the registration block.
- If you don't want to provide a checksum, because your data can change pass in the type `Any` which will suppress the warning messages. (But see above warnings about "what if my data is dynamic")
- Can take a vector of checksums, being one for each file, or a single checksum in which case the per file hashes are `xor`ed to get the target hash. (See [Recursive Structure](Recursive Structure) below)
- `fetch_method=download` a function to run to download the files.
- Function should take 2 parameters (remote_fikepath, local_filepath), and can return anything
- Defaults to `Base.download` which invokes commandline download tools.
- Can take a vector of methods, being one for each file, or a single method, in which case that method is used to download all of them. (See [Recursive Structure](Recursive Structure) below)
- Very few people will need to override this, but potentially it can be used to deal with things like authorisation (let me know if you try)
- `post_fetch_method` a function to run after the files have download
- Should take the local filepath as its first and only argument. Can return anything.
- Default is to do nothing.
- Can do what it wants from there, but most likes wants to extract the file into the data directory.
- towards this end DataDeps includes a command: `unpack` which will extract an compressed folder, deleting the original.
- It should be noted that it `post_fetch_method` runs from within the data directory
- which means operations that just write to the current working directory (like `rm` or `mv` or ```run(`SOMECMD`))``` just work.
- You can call `cwd()` to get the the data directory for your own functions. (Or `dirname(local_filepath)`)
- Can take a vector of methods, being one for each file, or a single method, in which case that ame method is applied to all of the files. (See **Recursive Structure** in the README.md)
"""
function DataDep(name::String, message::String, remotepath, hash=nothing; fetch_method=download, post_fetch_method=identity)
DataDep(name, remotepath, hash, fetch_method, post_fetch_method, message)
end

0 comments on commit caef6a1

Please sign in to comment.