diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml new file mode 100644 index 0000000..6a1d44a --- /dev/null +++ b/.buildkite/pipeline.yml @@ -0,0 +1,40 @@ +steps: + - label: "GPU integration with julia v1.6" + plugins: + - JuliaCI/julia#v1: + # Drop default "registries" directory, so it is not persisted from execution to execution + # Taken from https://github.com/JuliaLang/julia/blob/v1.7.2/.buildkite/pipelines/main/platforms/package_linux.yml#L11-L12 + persist_depot_dirs: packages,artifacts,compiled + version: "1.6" + - JuliaCI/julia-test#v1: ~ + agents: + queue: "juliagpu" + cuda: "*" + timeout_in_minutes: 60 + + - label: "GPU integration with julia v1" + plugins: + - JuliaCI/julia#v1: + version: "1" + - JuliaCI/julia-test#v1: ~ + - JuliaCI/julia-coverage#v1: + codecov: true + agents: + queue: "juliagpu" + cuda: "*" + env: + JULIA_CUDA_USE_BINARYBUILDER: "true" + timeout_in_minutes: 60 + + # - label: "GPU nightly" + # plugins: + # - JuliaCI/julia#v1: + # version: "nightly" + # - JuliaCI/julia-test#v1: ~ + # agents: + # queue: "juliagpu" + # cuda: "*" + # timeout_in_minutes: 60 + +env: + SECRET_CODECOV_TOKEN: "fAV/xwuaV0l5oaIYSAXRQIor8h7yHdlrpLUZFwNVnchn7rDk9UZoz0oORG9vlKLc1GK2HhaPRAy+fTkJ3GM/8Y0phHh3ANK8f5UsGm2DUTNsnf6u9izgnwnoRTcsWu+vSO0fyYrxBvBCoJwljL+yZbDFz3oE16DP7HPIzxfQagm+o/kMEszVuoUXhuLXXH0LxT6pXl214qjqs04HfMRmKIIiup48NB6fBLdhGlQz64MdMNHBfgDa/fafB7eNvn0X6pEOxysoy6bDQLUhKelOXgcDx1UsTo34Yiqr+QeJPAeKcO//PWurwQhPoUoHfLad2da9DN4uQk4YQLqAlcIuAA==;U2FsdGVkX1+mRXF2c9soCXT7DYymY3msM+vrpaifiTp8xA+gMpbQ0G63WY3tJ+6V/fJcVnxYoKZVXbjcg8fl4Q==" diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index f649c74..949f5e7 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -49,3 +49,29 @@ jobs: - uses: codecov/codecov-action@v2 with: files: lcov.info + + docs: + name: Documentation + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@v1 + with: + version: '1.6' + - run: | + julia --project=docs -e ' + using Pkg + Pkg.develop(PackageSpec(path=pwd())) + Pkg.instantiate()' + - run: | + julia --project=docs/ -e ' + using OneHotArrays + # using Pkg; Pkg.activate("docs") + using Documenter + using Documenter: doctest + DocMeta.setdocmeta!(OneHotArrays, :DocTestSetup, :(using OneHotArrays); recursive=true) + doctest(OneHotArrays)' + - run: julia --project=docs docs/make.jl + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} diff --git a/docs/Project.toml b/docs/Project.toml new file mode 100644 index 0000000..3a52a5d --- /dev/null +++ b/docs/Project.toml @@ -0,0 +1,5 @@ +[deps] +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" + +[compat] +Documenter = "0.27" diff --git a/docs/make.jl b/docs/make.jl new file mode 100644 index 0000000..79336a2 --- /dev/null +++ b/docs/make.jl @@ -0,0 +1,10 @@ +using Documenter, OneHotArrays + +DocMeta.setdocmeta!(OneHotArrays, :DocTestSetup, :(using OneHotArrays); recursive = true) +makedocs(sitename = "OneHotArrays", doctest = false, + pages = ["Overview" => "index.md", + "Reference" => "reference.md"]) + +deploydocs(repo = "github.com/FluxML/OneHotArrays.jl.git", + target = "build", + push_preview = true) diff --git a/docs/src/index.md b/docs/src/index.md new file mode 100644 index 0000000..627220f --- /dev/null +++ b/docs/src/index.md @@ -0,0 +1,39 @@ +# OneHotArrays.jl + +[![CI](https://github.com/FluxML/OneHotArrays.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/FluxML/OneHotArrays.jl/actions/workflows/CI.yml) + +Memory efficient one-hot array encodings (primarily for use in machine-learning contexts like Flux.jl). + +## Usage + +One-hot arrays are boolean arrays where only a single element in the first dimension is `true` (i.e. "hot"). OneHotArrays.jl stores such arrays efficiently by encoding a N-dimensional array of booleans as a (N - 1)-dimensional array of integers. For example, the one-hot vector below only uses a single `UInt32` for storage. + +```julia +julia> β = onehot(:b, (:a, :b, :c)) +3-element OneHotVector(::UInt32) with eltype Bool: + ⋅ + 1 + ⋅ +``` + +As seen above, the one-hot encoding can be useful for representing labeled data. The label `:b` is encoded into a 3-element vector where the "hot" element indicates the label from the set `(:a, :b, :c)`. + +We can also encode a batch of one-hot vectors or reverse the encoding. + +```julia +julia> oh = onehotbatch("abracadabra", 'a':'e', 'e') +5×11 OneHotMatrix(::Vector{UInt32}) with eltype Bool: + 1 ⋅ ⋅ 1 ⋅ 1 ⋅ 1 ⋅ ⋅ 1 + ⋅ 1 ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ 1 ⋅ ⋅ + ⋅ ⋅ ⋅ ⋅ 1 ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ + ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ 1 ⋅ ⋅ ⋅ ⋅ + ⋅ ⋅ 1 ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ 1 ⋅ + +julia> Flux.onecold(β, (:a, :b, :c)) +:b + +julia> Flux.onecold([0.3, 0.2, 0.5], (:a, :b, :c)) +:c +``` + +In addition to functions for encoding and decoding data as one-hot, this package provides numerous "fast-paths" for linear algebraic operations with one-hot arrays. For example, multiplying by a matrix by a one-hot vector triggers an indexing operation instead of a matrix multiplication. diff --git a/docs/src/reference.md b/docs/src/reference.md new file mode 100644 index 0000000..9423e5a --- /dev/null +++ b/docs/src/reference.md @@ -0,0 +1,6 @@ +# Reference + +```@autodocs +Modules = [OneHotArrays] +Order = [:function, :type] +``` diff --git a/src/OneHotArrays.jl b/src/OneHotArrays.jl index c14387e..ef6c886 100644 --- a/src/OneHotArrays.jl +++ b/src/OneHotArrays.jl @@ -7,8 +7,8 @@ using LinearAlgebra using MLUtils using NNlib -export onehot, onehotbatch, onecold, OneHotArray, - OneHotVector, OneHotMatrix, OneHotLike +export onehot, onehotbatch, onecold, + OneHotArray, OneHotVector, OneHotMatrix, OneHotLike include("array.jl") include("onehot.jl") diff --git a/src/onehot.jl b/src/onehot.jl index 4ff19e8..6231f3c 100644 --- a/src/onehot.jl +++ b/src/onehot.jl @@ -12,13 +12,13 @@ and [`onecold`](@ref) to reverse either of these, as well as to generalise `argm # Examples ```jldoctest -julia> β = Flux.onehot(:b, (:a, :b, :c)) +julia> β = onehot(:b, (:a, :b, :c)) 3-element OneHotVector(::UInt32) with eltype Bool: ⋅ 1 ⋅ -julia> αβγ = (Flux.onehot(0, 0:2), β, Flux.onehot(:z, [:a, :b, :c], :c)) # uses default +julia> αβγ = (onehot(0, 0:2), β, onehot(:z, [:a, :b, :c], :c)) # uses default (Bool[1, 0, 0], Bool[0, 1, 0], Bool[0, 0, 1]) julia> hcat(αβγ...) # preserves sparsity @@ -66,7 +66,7 @@ for `labels` will often speed up construction, certainly for less than 32 classe # Examples ```jldoctest -julia> oh = Flux.onehotbatch("abracadabra", 'a':'e', 'e') +julia> oh = onehotbatch("abracadabra", 'a':'e', 'e') 5×11 OneHotMatrix(::Vector{UInt32}) with eltype Bool: 1 ⋅ ⋅ 1 ⋅ 1 ⋅ 1 ⋅ ⋅ 1 ⋅ 1 ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ 1 ⋅ ⋅ @@ -112,17 +112,17 @@ the same operation as `argmax(y, dims=1)` but sometimes a different return type. # Examples ```jldoctest -julia> Flux.onecold([false, true, false]) +julia> onecold([false, true, false]) 2 -julia> Flux.onecold([0.3, 0.2, 0.5], (:a, :b, :c)) +julia> onecold([0.3, 0.2, 0.5], (:a, :b, :c)) :c -julia> Flux.onecold([ 1 0 0 1 0 1 0 1 0 0 1 - 0 1 0 0 0 0 0 0 1 0 0 - 0 0 0 0 1 0 0 0 0 0 0 - 0 0 0 0 0 0 1 0 0 0 0 - 0 0 1 0 0 0 0 0 0 1 0 ], 'a':'e') |> String +julia> onecold([ 1 0 0 1 0 1 0 1 0 0 1 + 0 1 0 0 0 0 0 0 1 0 0 + 0 0 0 0 1 0 0 0 0 0 0 + 0 0 0 0 0 0 1 0 0 0 0 + 0 0 1 0 0 0 0 0 0 1 0 ], 'a':'e') |> String "abeacadabea" ``` """ diff --git a/test/gpu.jl b/test/gpu.jl index 91fa2a8..13c208c 100644 --- a/test/gpu.jl +++ b/test/gpu.jl @@ -6,7 +6,7 @@ cx = cu(x) @test cx isa CuArray - @test_broken onecold(cu([1.0, 2.0, 3.0])) == 3 # scalar indexing error? + @test_skip onecold(cu([1.0, 2.0, 3.0])) == 3 # passes with CuArray with Julia 1.6, but fails with JLArray x = onehotbatch([1, 2, 3], 1:3) cx = cu(x)