Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hard deps and CSV->DelimitedFiles #78

Merged
merged 7 commits into from
Nov 6, 2019
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 9 additions & 11 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,35 +5,33 @@ version = "0.7.3"

[deps]
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7"
Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

[compat]
CategoricalArrays = "<0.5.3, 0.7"
Distributions = "0.21.3"
Missings = "0.4.3"
OrderedCollections = "1.1"
Requires = "^0.5.2"
ScientificTypes = "0.2.0"
StatsBase = "0.32"
Tables = "<0.1.19, 0.2"
CategoricalArrays = "^0.7"
LossFunctions = "^0.5"
OrderedCollections = "^1.1"
ScientificTypes = "^0.2"
StatsBase = "^0.32"
Tables = "^0.2"
julia = "1"

[extras]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
LossFunctions = "30fc2ffe-d236-52d8-8643-a9d8f7c094a7"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
TypedTables = "9d95f2ec-7b3d-5a63-8d20-e2491e220bb9"

[targets]
test = ["CSV", "DataFrames", "Distances", "LossFunctions", "Test", "TypedTables"]
test = ["CSV", "DataFrames", "Distances", "Test", "TypedTables"]
30 changes: 17 additions & 13 deletions src/MLJBase.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Users of this module should first read the document
# https://alan-turing-institute.github.io/MLJ.jl/dev/adding_models_for_general_use/
module MLJBase
module MLJBase

export MLJType, Model, Supervised, Unsupervised
export Deterministic, Probabilistic, Interval
Expand Down Expand Up @@ -56,26 +56,25 @@ export std
import Base.==
import Base: @__doc__

using Tables
using Tables, DelimitedFiles
using OrderedCollections # already a dependency of StatsBase
import Distributions
import Distributions: pdf, mode
using CategoricalArrays
using OrderedCollections
import CategoricalArrays
using ScientificTypes
import ScientificTypes: trait

# to be extended:
import StatsBase: fit, predict, fit!
import Missings.levels

import Distributions
import Distributions: pdf, mode

using ScientificTypes

# from Standard Library:

using Statistics
using Random
using InteractiveUtils

using LossFunctions

## CONSTANTS

Expand All @@ -88,13 +87,11 @@ const DEFAULT_SHOW_DEPTH = 0

include("utilities.jl")


## BASE TYPES

abstract type MLJType end
include("equality.jl") # equality for MLJType objects


## ABSTRACT MODEL TYPES

# for storing hyperparameters:
Expand Down Expand Up @@ -178,7 +175,6 @@ function best end
# message):
clean!(model::Model) = ""


## TRAITS

"""
Expand Down Expand Up @@ -218,6 +214,14 @@ include("mlj_model_macro.jl")
include("metadata_utilities.jl")

# __init__() function:
include("init.jl")
# include("init.jl")

ScientificTypes.TRAIT_FUNCTION_GIVEN_NAME[:supervised_model] =
x-> x isa Supervised
ScientificTypes.TRAIT_FUNCTION_GIVEN_NAME[:unsupervised_model] =
x-> x isa Unsupervised
ScientificTypes.TRAIT_FUNCTION_GIVEN_NAME[:measure] = is_measure

include("loss_functions_interface.jl")

end # module
21 changes: 9 additions & 12 deletions src/data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,15 @@ function partition(rows::AbstractVector{Int}, fractions...; shuffle::Bool=false,
return tuple(rowss...)
end


"""
t1, t2, ...., tk = unnpack(table, t1, t2, ... tk; wrap_singles=false)

Split any Tables.jl compatible `table` into smaller tables (or
vectors) `t1, t2, ..., tk` by making selections *without replacement*
from the column names defined by the tests `t1`, `t2`, ...,
`tk`. A *test* is any object `t` such that `t(name)` is `true`
or `false` for each column `name::Symbol` of `table`.
or `false` for each column `name::Symbol` of `table`.

Whenever a returned table contains a single column, it is converted to
a vector unless `wrap_singles=true`.
Expand All @@ -66,7 +67,7 @@ semicolon):
```
julia> table = DataFrame(x=[1,2], y=['a', 'b'], z=[10.0, 20.0], w=[:A, :B])
julia> Z, XY = unpack(table, ==(:z), !=(:w);
:x=>Continuous, :y=>Multiclass)
:x=>Continuous, :y=>Multiclass)
julia> XY
2×2 DataFrame
│ Row │ x │ y │
Expand All @@ -80,7 +81,6 @@ julia> Z
10.0
20.0
```

"""
function unpack(X, tests...; wrap_singles=false, pairs...)

Expand All @@ -106,13 +106,13 @@ function unpack(X, tests...; wrap_singles=false, pairs...)
counter += 1
end
return Tuple(unpacked)

end


## DEALING WITH CATEGORICAL ELEMENTS

CategoricalElement{U} = Union{CategoricalValue{<:Any,U},CategoricalString{U}}
const CategoricalElement{U} = Union{CategoricalValue{<:Any,U},CategoricalString{U}}


"""
classes(x)
Expand Down Expand Up @@ -178,13 +178,14 @@ int(A::AbstractArray) = broadcast(int, A)

# get the integer representation of a level given pool (private
# method):
int(pool::CategoricalPool, level) = pool.order[pool.invindex[level]]
int(pool::CategoricalPool, level) = pool.order[pool.invindex[level]]

struct CategoricalDecoder{T,R} # <: MLJType
pool::CategoricalPool{T,R}
invorder::Vector{Int}
end


"""
d = decoder(x)

Expand All @@ -208,7 +209,6 @@ integer arrays, in which case `d` is broadcast over all elements.
*Warning:* It is *not* true that `int(d(u)) == u` always holds.

See also: [`int`](@ref), [`classes`](@ref).

"""
decoder(element::CategoricalElement) =
CategoricalDecoder(element.pool, sortperm(element.pool.order))
Expand All @@ -235,7 +235,6 @@ Convert a Tables.jl compatible table source `X` into an `Matrix`; or,
if `X` is a `AbstractMatrix`, return `X`. Optimized for column-based
sources. Rows of the table or input matrix, correspond to rows of the
output, unless `transpose=true`.

"""
matrix(X; kwargs...) = matrix(Val(ScientificTypes.trait(X)), X; kwargs...)
matrix(::Val{:other}, X; kwargs...) = throw(ArgumentError)
Expand Down Expand Up @@ -283,7 +282,6 @@ If a `prototype` is specified, then the matrix is materialized as a
table of the preferred sink type of `prototype`, rather than
wrapped. Note that if `protottype` is *not* specified, then
`MLJ.matrix(MLJ.table(A))` is essentially a non-operation.

"""
function table(cols::NamedTuple; prototype=NamedTuple())
Tables.istable(prototype) || error("`prototype` is not a table. ")
Expand Down Expand Up @@ -429,11 +427,11 @@ nrows(::Val{:other}, v::AbstractVector) = length(v)

## ACCESSORS FOR ABSTRACT MATRICES

selectrows(::Val{:other}, A::AbstractMatrix, r) = A[r, :]
selectrows(::Val{:other}, A::AbstractMatrix, r) = A[r, :]
selectrows(::Val{:other}, A::CategoricalMatrix, r) = @inbounds A[r, :]

# single row selection must return a matrix!
selectrows(::Val{:other}, A::AbstractMatrix, r::Integer) = A[r:r, :]
selectrows(::Val{:other}, A::AbstractMatrix, r::Integer) = A[r:r, :]
selectrows(::Val{:other}, A::CategoricalMatrix, r::Integer) =
@inbounds A[r:r, :]

Expand Down Expand Up @@ -462,4 +460,3 @@ nrows(::Val{:other}, A::AbstractMatrix) = size(A, 1)
# select(::Val{:sparse}, X, r::Integer, c::AbstractVector{Symbol}) = X[r,sort(c)]
# select(::Val{:sparse}, X, r::Integer, ::Colon) = X[r,:]
# select(::Val{:sparse}, X, r, c) = X[r,sort(c)]

Loading