From 7a18e8a56b361c0fe162b3d0c11442989ed5d81b Mon Sep 17 00:00:00 2001 From: Kyle Daruwalla Date: Tue, 5 Apr 2022 08:43:45 -0500 Subject: [PATCH 1/6] Initial docs and clean up to register --- .github/workflows/CI.yml | 26 ++++++++++++++++++++++++++ docs/Project.toml | 5 +++++ docs/make.jl | 10 ++++++++++ docs/src/index.md | 39 +++++++++++++++++++++++++++++++++++++++ docs/src/reference.md | 6 ++++++ src/OneHotArrays.jl | 4 ++-- src/onehot.jl | 20 ++++++++++---------- 7 files changed, 98 insertions(+), 12 deletions(-) create mode 100644 docs/Project.toml create mode 100644 docs/make.jl create mode 100644 docs/src/index.md create mode 100644 docs/src/reference.md diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index f649c74..949f5e7 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -49,3 +49,29 @@ jobs: - uses: codecov/codecov-action@v2 with: files: lcov.info + + docs: + name: Documentation + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@v1 + with: + version: '1.6' + - run: | + julia --project=docs -e ' + using Pkg + Pkg.develop(PackageSpec(path=pwd())) + Pkg.instantiate()' + - run: | + julia --project=docs/ -e ' + using OneHotArrays + # using Pkg; Pkg.activate("docs") + using Documenter + using Documenter: doctest + DocMeta.setdocmeta!(OneHotArrays, :DocTestSetup, :(using OneHotArrays); recursive=true) + doctest(OneHotArrays)' + - run: julia --project=docs docs/make.jl + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} diff --git a/docs/Project.toml b/docs/Project.toml new file mode 100644 index 0000000..3a52a5d --- /dev/null +++ b/docs/Project.toml @@ -0,0 +1,5 @@ +[deps] +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" + +[compat] +Documenter = "0.27" diff --git a/docs/make.jl b/docs/make.jl new file mode 100644 index 0000000..3ecace4 --- /dev/null +++ b/docs/make.jl @@ -0,0 +1,10 @@ +using Documenter, OneHotArrays + +DocMeta.setdocmeta!(OneHotArrays, :DocTestSetup, :(using OneHotArrays); recursive = true) +make(sitename = "OneHotArrays", doctest = false, + pages = ["Overview" => "index.md", + "Reference" => "reference.md"]) + +deploydocs(repo = "github.com/FluxML/OneHotArrays.jl.git", + target = "build", + push_preview = true) diff --git a/docs/src/index.md b/docs/src/index.md new file mode 100644 index 0000000..627220f --- /dev/null +++ b/docs/src/index.md @@ -0,0 +1,39 @@ +# OneHotArrays.jl + +[![CI](https://github.com/FluxML/OneHotArrays.jl/actions/workflows/CI.yml/badge.svg)](https://github.com/FluxML/OneHotArrays.jl/actions/workflows/CI.yml) + +Memory efficient one-hot array encodings (primarily for use in machine-learning contexts like Flux.jl). + +## Usage + +One-hot arrays are boolean arrays where only a single element in the first dimension is `true` (i.e. "hot"). OneHotArrays.jl stores such arrays efficiently by encoding a N-dimensional array of booleans as a (N - 1)-dimensional array of integers. For example, the one-hot vector below only uses a single `UInt32` for storage. + +```julia +julia> β = onehot(:b, (:a, :b, :c)) +3-element OneHotVector(::UInt32) with eltype Bool: + ⋅ + 1 + ⋅ +``` + +As seen above, the one-hot encoding can be useful for representing labeled data. The label `:b` is encoded into a 3-element vector where the "hot" element indicates the label from the set `(:a, :b, :c)`. + +We can also encode a batch of one-hot vectors or reverse the encoding. + +```julia +julia> oh = onehotbatch("abracadabra", 'a':'e', 'e') +5×11 OneHotMatrix(::Vector{UInt32}) with eltype Bool: + 1 ⋅ ⋅ 1 ⋅ 1 ⋅ 1 ⋅ ⋅ 1 + ⋅ 1 ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ 1 ⋅ ⋅ + ⋅ ⋅ ⋅ ⋅ 1 ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ + ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ 1 ⋅ ⋅ ⋅ ⋅ + ⋅ ⋅ 1 ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ 1 ⋅ + +julia> Flux.onecold(β, (:a, :b, :c)) +:b + +julia> Flux.onecold([0.3, 0.2, 0.5], (:a, :b, :c)) +:c +``` + +In addition to functions for encoding and decoding data as one-hot, this package provides numerous "fast-paths" for linear algebraic operations with one-hot arrays. For example, multiplying by a matrix by a one-hot vector triggers an indexing operation instead of a matrix multiplication. diff --git a/docs/src/reference.md b/docs/src/reference.md new file mode 100644 index 0000000..9423e5a --- /dev/null +++ b/docs/src/reference.md @@ -0,0 +1,6 @@ +# Reference + +```@autodocs +Modules = [OneHotArrays] +Order = [:function, :type] +``` diff --git a/src/OneHotArrays.jl b/src/OneHotArrays.jl index c14387e..ef6c886 100644 --- a/src/OneHotArrays.jl +++ b/src/OneHotArrays.jl @@ -7,8 +7,8 @@ using LinearAlgebra using MLUtils using NNlib -export onehot, onehotbatch, onecold, OneHotArray, - OneHotVector, OneHotMatrix, OneHotLike +export onehot, onehotbatch, onecold, + OneHotArray, OneHotVector, OneHotMatrix, OneHotLike include("array.jl") include("onehot.jl") diff --git a/src/onehot.jl b/src/onehot.jl index 4ff19e8..6231f3c 100644 --- a/src/onehot.jl +++ b/src/onehot.jl @@ -12,13 +12,13 @@ and [`onecold`](@ref) to reverse either of these, as well as to generalise `argm # Examples ```jldoctest -julia> β = Flux.onehot(:b, (:a, :b, :c)) +julia> β = onehot(:b, (:a, :b, :c)) 3-element OneHotVector(::UInt32) with eltype Bool: ⋅ 1 ⋅ -julia> αβγ = (Flux.onehot(0, 0:2), β, Flux.onehot(:z, [:a, :b, :c], :c)) # uses default +julia> αβγ = (onehot(0, 0:2), β, onehot(:z, [:a, :b, :c], :c)) # uses default (Bool[1, 0, 0], Bool[0, 1, 0], Bool[0, 0, 1]) julia> hcat(αβγ...) # preserves sparsity @@ -66,7 +66,7 @@ for `labels` will often speed up construction, certainly for less than 32 classe # Examples ```jldoctest -julia> oh = Flux.onehotbatch("abracadabra", 'a':'e', 'e') +julia> oh = onehotbatch("abracadabra", 'a':'e', 'e') 5×11 OneHotMatrix(::Vector{UInt32}) with eltype Bool: 1 ⋅ ⋅ 1 ⋅ 1 ⋅ 1 ⋅ ⋅ 1 ⋅ 1 ⋅ ⋅ ⋅ ⋅ ⋅ ⋅ 1 ⋅ ⋅ @@ -112,17 +112,17 @@ the same operation as `argmax(y, dims=1)` but sometimes a different return type. # Examples ```jldoctest -julia> Flux.onecold([false, true, false]) +julia> onecold([false, true, false]) 2 -julia> Flux.onecold([0.3, 0.2, 0.5], (:a, :b, :c)) +julia> onecold([0.3, 0.2, 0.5], (:a, :b, :c)) :c -julia> Flux.onecold([ 1 0 0 1 0 1 0 1 0 0 1 - 0 1 0 0 0 0 0 0 1 0 0 - 0 0 0 0 1 0 0 0 0 0 0 - 0 0 0 0 0 0 1 0 0 0 0 - 0 0 1 0 0 0 0 0 0 1 0 ], 'a':'e') |> String +julia> onecold([ 1 0 0 1 0 1 0 1 0 0 1 + 0 1 0 0 0 0 0 0 1 0 0 + 0 0 0 0 1 0 0 0 0 0 0 + 0 0 0 0 0 0 1 0 0 0 0 + 0 0 1 0 0 0 0 0 0 1 0 ], 'a':'e') |> String "abeacadabea" ``` """ From e7f3913324bc18722c55b887f4718e2b1028bebe Mon Sep 17 00:00:00 2001 From: Kyle Daruwalla Date: Tue, 5 Apr 2022 08:51:36 -0500 Subject: [PATCH 2/6] Add buildkite pipeline --- .buildkite/pipeline.yml | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 .buildkite/pipeline.yml diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml new file mode 100644 index 0000000..7be3853 --- /dev/null +++ b/.buildkite/pipeline.yml @@ -0,0 +1,37 @@ +steps: + - label: "GPU integration with julia v1.6" + plugins: + - JuliaCI/julia#v1: + # Drop default "registries" directory, so it is not persisted from execution to execution + # Taken from https://github.com/JuliaLang/julia/blob/v1.7.2/.buildkite/pipelines/main/platforms/package_linux.yml#L11-L12 + persist_depot_dirs: packages,artifacts,compiled + version: "1.6" + - JuliaCI/julia-test#v1: ~ + agents: + queue: "juliagpu" + cuda: "*" + timeout_in_minutes: 60 + + - label: "GPU integration with julia v1" + plugins: + - JuliaCI/julia#v1: + version: "1" + - JuliaCI/julia-test#v1: ~ + - JuliaCI/julia-coverage#v1: + codecov: true + agents: + queue: "juliagpu" + cuda: "*" + env: + JULIA_CUDA_USE_BINARYBUILDER: "true" + timeout_in_minutes: 60 + + # - label: "GPU nightly" + # plugins: + # - JuliaCI/julia#v1: + # version: "nightly" + # - JuliaCI/julia-test#v1: ~ + # agents: + # queue: "juliagpu" + # cuda: "*" + # timeout_in_minutes: 60 From bf3c51d8ae39deab50902b8d6950e05d1e6a3f53 Mon Sep 17 00:00:00 2001 From: Kyle Daruwalla Date: Tue, 5 Apr 2022 08:52:56 -0500 Subject: [PATCH 3/6] Fix typo in `docs/make.jl` --- docs/make.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/make.jl b/docs/make.jl index 3ecace4..79336a2 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,9 +1,9 @@ using Documenter, OneHotArrays DocMeta.setdocmeta!(OneHotArrays, :DocTestSetup, :(using OneHotArrays); recursive = true) -make(sitename = "OneHotArrays", doctest = false, - pages = ["Overview" => "index.md", - "Reference" => "reference.md"]) +makedocs(sitename = "OneHotArrays", doctest = false, + pages = ["Overview" => "index.md", + "Reference" => "reference.md"]) deploydocs(repo = "github.com/FluxML/OneHotArrays.jl.git", target = "build", From a5c4cd2e07f0caf1a7c7b4e52dcce9c8c02aff18 Mon Sep 17 00:00:00 2001 From: Kyle Daruwalla Date: Tue, 5 Apr 2022 08:59:57 -0500 Subject: [PATCH 4/6] Unmark broken GPU tests --- test/gpu.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/gpu.jl b/test/gpu.jl index 91fa2a8..a799acb 100644 --- a/test/gpu.jl +++ b/test/gpu.jl @@ -6,7 +6,7 @@ cx = cu(x) @test cx isa CuArray - @test_broken onecold(cu([1.0, 2.0, 3.0])) == 3 # scalar indexing error? + @test onecold(cu([1.0, 2.0, 3.0])) == 3 x = onehotbatch([1, 2, 3], 1:3) cx = cu(x) From 42726286ba5f9dcd72399cbb8e97f41349c49f93 Mon Sep 17 00:00:00 2001 From: Kyle Daruwalla Date: Tue, 5 Apr 2022 11:00:47 -0500 Subject: [PATCH 5/6] Skip test that triggers scalar indexing with `JLArray` Co-authored-by: Michael Abbott <32575566+mcabbott@users.noreply.github.com> --- test/gpu.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/gpu.jl b/test/gpu.jl index a799acb..13c208c 100644 --- a/test/gpu.jl +++ b/test/gpu.jl @@ -6,7 +6,7 @@ cx = cu(x) @test cx isa CuArray - @test onecold(cu([1.0, 2.0, 3.0])) == 3 + @test_skip onecold(cu([1.0, 2.0, 3.0])) == 3 # passes with CuArray with Julia 1.6, but fails with JLArray x = onehotbatch([1, 2, 3], 1:3) cx = cu(x) From 22b98c0607aadecd4604e3732131931adbe42cd3 Mon Sep 17 00:00:00 2001 From: Kyle Daruwalla Date: Tue, 5 Apr 2022 12:23:29 -0500 Subject: [PATCH 6/6] Add codecov token to secrets --- .buildkite/pipeline.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 7be3853..6a1d44a 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -35,3 +35,6 @@ steps: # queue: "juliagpu" # cuda: "*" # timeout_in_minutes: 60 + +env: + SECRET_CODECOV_TOKEN: "fAV/xwuaV0l5oaIYSAXRQIor8h7yHdlrpLUZFwNVnchn7rDk9UZoz0oORG9vlKLc1GK2HhaPRAy+fTkJ3GM/8Y0phHh3ANK8f5UsGm2DUTNsnf6u9izgnwnoRTcsWu+vSO0fyYrxBvBCoJwljL+yZbDFz3oE16DP7HPIzxfQagm+o/kMEszVuoUXhuLXXH0LxT6pXl214qjqs04HfMRmKIIiup48NB6fBLdhGlQz64MdMNHBfgDa/fafB7eNvn0X6pEOxysoy6bDQLUhKelOXgcDx1UsTo34Yiqr+QeJPAeKcO//PWurwQhPoUoHfLad2da9DN4uQk4YQLqAlcIuAA==;U2FsdGVkX1+mRXF2c9soCXT7DYymY3msM+vrpaifiTp8xA+gMpbQ0G63WY3tJ+6V/fJcVnxYoKZVXbjcg8fl4Q=="