Skip to content

Commit

Permalink
more 0.7 updates
Browse files Browse the repository at this point in the history
  • Loading branch information
KristofferC committed Jun 27, 2018
1 parent 46377ad commit bfbe593
Show file tree
Hide file tree
Showing 14 changed files with 47 additions and 64 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@
*.ipynb_checkpoints/
benchmarks/*.jld
benchmarks/*.md
Manifest.toml

12 changes: 12 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
name = "NearestNeighbors"
uuid = "b8a86587-4115-5ab1-83bc-aa920d37bbce"
version = "0.4.0"

[deps]
Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"

[targets.test.deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Mmap = "a63ad114-7e13-5084-954f-fe012c677804"
14 changes: 1 addition & 13 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ By default, the trees store a copy of the `data` provided during construction. T
`DataFreeTree` can be used to strip a constructed tree of its data field and re-link it with that data at a later stage. An example of using a large on-disk data set looks like this:

```jl
using Mmap
ndim = 2; ndata = 10_000_000_000
data = Mmap.mmap(datafilename, Matrix{Float32}, (ndim, ndata))
data[:] = rand(Float32, ndim, ndata) # create example data
Expand All @@ -138,19 +139,6 @@ tree = injectdata(dftree, data) # yields a KDTree
knn(tree, data[:,1], 3) # perform operations as usual
```

In case you want to exploit the reordering feature, which can improve access times by placing data items close together in memory / on disk when they are close together according to the metric used, you can pass a custom `reorderbuffer`. This can be either in-memory or mmapped, as in the following example:

```jl
reorderbuffer = Mmap.mmap(reorderedfilename, Matrix{Float32}, (ndim, ndata))
dftree = DataFreeTree(KDTree, data, reorderbuffer = reorderbuffer)
# all future operations are indepented of 'data'
tree = injectdata(dftree, reorderbuffer)
```

## Debugging

There are some basic debugging/statistics functionality implemented. These are activated by setting the
`DEBUG` variable to `true` in the `NearestNeighbors.jl` file. For the debugging options, please see the `debugging.jl` file. Pull requests to enhance this are welcome.

## Author

Expand Down
1 change: 1 addition & 0 deletions benchmarks/benchmarkdatafreetree.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using NearestNeighbors
using Benchmarks
using Mmap

runtimes = []
runtimesreordered = []
Expand Down
2 changes: 0 additions & 2 deletions src/NearestNeighbors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ __precompile__()

module NearestNeighbors

using Compat

using Distances
import Distances: Metric, result_type, eval_reduce, eval_end, eval_op, eval_start, evaluate

Expand Down
18 changes: 8 additions & 10 deletions src/ball_tree.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ function BallTree(data::Vector{V},
leafsize::Int = 10,
reorder::Bool = true,
storedata::Bool = true,
reorderbuffer::Vector{V} = Vector{V}(),
indicesfor::Symbol = :data) where {V <: AbstractArray, M <: Metric}
reorderbuffer::Vector{V} = Vector{V}()) where {V <: AbstractArray, M <: Metric}
reorder = !isempty(reorderbuffer) || (storedata ? reorder : false)

tree_data = TreeData(data, leafsize)
Expand All @@ -45,12 +44,12 @@ function BallTree(data::Vector{V},
indices = collect(1:n_p)

# Bottom up creation of hyper spheres so need spheres even for leafs)
@compat hyper_spheres = Vector{HyperSphere{length(V),eltype(V)}}(uninitialized, tree_data.n_internal_nodes + tree_data.n_leafs)
hyper_spheres = Vector{HyperSphere{length(V),eltype(V)}}(undef, tree_data.n_internal_nodes + tree_data.n_leafs)

if reorder
@compat indices_reordered = Vector{Int}(uninitialized, n_p)
indices_reordered = Vector{Int}(undef, n_p)
if isempty(reorderbuffer)
@compat data_reordered = Vector{V}(uninitialized, n_p)
data_reordered = Vector{V}(undef, n_p)
else
data_reordered = reorderbuffer
end
Expand All @@ -68,19 +67,18 @@ function BallTree(data::Vector{V},

if reorder
data = data_reordered
indices = indicesfor == :data ? indices_reordered : collect(1:n_p)
indices = indices_reordered
end

BallTree(storedata ? data : similar(data, 0), hyper_spheres, indices, metric, tree_data, reorder)
end

@compat function BallTree(data::Matrix{T},
function BallTree(data::Matrix{T},
metric::M = Euclidean();
leafsize::Int = 10,
storedata::Bool = true,
reorder::Bool = true,
reorderbuffer::Matrix{T} = Matrix{T}(uninitialized, 0, 0),
indicesfor::Symbol = :data) where {T <: AbstractFloat, M <: Metric}
reorderbuffer::Matrix{T} = Matrix{T}(undef, 0, 0)) where {T <: AbstractFloat, M <: Metric}
dim = size(data, 1)
npoints = size(data, 2)
points = reinterpret_or_copy(T, data, Val(dim))
Expand All @@ -90,7 +88,7 @@ end
reorderbuffer_points = reinterpret_or_copy(T, reorderbuffer, Val(dim))
end
BallTree(points, metric, leafsize = leafsize, storedata = storedata, reorder = reorder,
reorderbuffer = reorderbuffer_points, indicesfor = indicesfor)
reorderbuffer = reorderbuffer_points)
end

# Recursive function to build the tree.
Expand Down
10 changes: 2 additions & 8 deletions src/datafreetree.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,13 @@ function get_points_dim(data)
end

"""
DataFreeTree(treetype, data[, reorderbufffer = similar(data), indicesfor = :data, kargs...]) -> datafreetree
DataFreeTree(treetype, data[, reorderbufffer = similar(data), kwargs...]) -> datafreetree
Creates a `DataFreeTree` which wraps a `KDTree` or `BallTree`. Keywords arguments are passed
to their respective constructors.
The `KDTree` or `BallTree` will be stored without a reference to the underlaying data. `injectdata`
has to be used to re-link them to a data array before use.
By default the `reorder` feature of `KDTree`/`BallTree` is turned off. In case a `reorderbuffer`
is provided, reordering is performed and the contents of `reorderbuffer` have to be later provided to
`injectdata`.
`indicesfor` controlls whether the indices returned by the query functions should refer to `data` or the `reorderbuffer`. Valid values are `:data` and `:reordered`.
"""
function DataFreeTree(::Type{T}, data, args...; reorderbuffer = data[:, 1:0], kargs...) where {T <: NNTree}
tree = T(data, args...; storedata = false, reorderbuffer = reorderbuffer, kargs...)
Expand Down Expand Up @@ -69,6 +63,6 @@ function injectdata(datafreetree::DataFreeTree, data::Vector{V}, new_hash::UInt6
end

typ = typeof(datafreetree.tree)
fields = map(x -> getfield(datafreetree.tree, x), fieldnames(datafreetree.tree))[2:end]
fields = map(x -> getfield(datafreetree.tree, x), fieldnames(typeof(datafreetree.tree)))[2:end]
typ(data, fields...)
end
4 changes: 2 additions & 2 deletions src/hyperrectangles.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ end
function compute_bbox(data::Vector{V}) where {V <: AbstractVector}
T = eltype(V)
n_dim = length(V)
@compat maxes = Vector{T}(uninitialized, n_dim)
@compat mins = Vector{T}(uninitialized, n_dim)
maxes = Vector{T}(undef, n_dim)
mins = Vector{T}(undef, n_dim)
@inbounds for j in 1:length(V)
dim_max = typemin(T)
dim_min = typemax(T)
Expand Down
2 changes: 1 addition & 1 deletion src/hyperspheres.jl
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ function create_bsphere(data::Vector{V}, metric::Metric, indices::Vector{Int}, l
ab.center[j] += data[indices[i]][j]
end
end
scale!(ab.center, 1 / n_points)
ab.center .*= 1 / n_points

# Then find r
r = zero(get_T(eltype(V)))
Expand Down
18 changes: 8 additions & 10 deletions src/kd_tree.jl
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,20 @@ function KDTree(data::Vector{V},
leafsize::Int = 10,
storedata::Bool = true,
reorder::Bool = true,
reorderbuffer::Vector{V} = Vector{V}(),
indicesfor::Symbol = :data) where {V <: AbstractArray, M <: MinkowskiMetric}
reorderbuffer::Vector{V} = Vector{V}()) where {V <: AbstractArray, M <: MinkowskiMetric}
reorder = !isempty(reorderbuffer) || (storedata ? reorder : false)

tree_data = TreeData(data, leafsize)
n_d = length(V)
n_p = length(data)

indices = collect(1:n_p)
@compat nodes = Vector{KDNode{eltype(V)}}(uninitialized, tree_data.n_internal_nodes)
nodes = Vector{KDNode{eltype(V)}}(undef, tree_data.n_internal_nodes)

if reorder
@compat indices_reordered = Vector{Int}(uninitialized, n_p)
indices_reordered = Vector{Int}(undef, n_p)
if isempty(reorderbuffer)
@compat data_reordered = Vector{V}(uninitialized, n_p)
data_reordered = Vector{V}(undef, n_p)
else
data_reordered = reorderbuffer
end
Expand All @@ -61,19 +60,18 @@ function KDTree(data::Vector{V},
1, length(data), tree_data, reorder)
if reorder
data = data_reordered
indices = indicesfor == :data ? indices_reordered : collect(1:n_p)
indices = indices_reordered
end

KDTree(storedata ? data : similar(data, 0), hyper_rec, indices, metric, nodes, tree_data, reorder)
end

@compat function KDTree(data::Matrix{T},
function KDTree(data::Matrix{T},
metric::M = Euclidean();
leafsize::Int = 10,
storedata::Bool = true,
reorder::Bool = true,
reorderbuffer::Matrix{T} = Matrix{T}(uninitialized, 0, 0),
indicesfor::Symbol = :data) where {T <: AbstractFloat, M <: MinkowskiMetric}
reorderbuffer::Matrix{T} = Matrix{T}(undef, 0, 0)) where {T <: AbstractFloat, M <: MinkowskiMetric}
dim = size(data, 1)
npoints = size(data, 2)
points = reinterpret_or_copy(T, data, Val(dim))
Expand All @@ -83,7 +81,7 @@ end
reorderbuffer_points = reinterpret_or_copy(T, reorderbuffer, Val(dim))
end
KDTree(points, metric, leafsize = leafsize, storedata = storedata, reorder = reorder,
reorderbuffer = reorderbuffer_points, indicesfor = indicesfor)
reorderbuffer = reorderbuffer_points)
end

function build_KDTree(index::Int,
Expand Down
8 changes: 4 additions & 4 deletions src/knn.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ function knn(tree::NNTree{V}, points::Vector{T}, k::Int, sortres=false, skip::Fu
check_input(tree, points)
check_k(tree, k)
n_points = length(points)
@compat dists = [Vector{get_T(eltype(V))}(uninitialized, k) for _ in 1:n_points]
@compat idxs = [Vector{Int}(uninitialized, k) for _ in 1:n_points]
dists = [Vector{get_T(eltype(V))}(undef, k) for _ in 1:n_points]
idxs = [Vector{Int}(undef, k) for _ in 1:n_points]
for i in 1:n_points
knn_point!(tree, points[i], sortres, dists[i], idxs[i], skip)
end
Expand All @@ -38,8 +38,8 @@ end

function knn(tree::NNTree{V}, point::AbstractVector{T}, k::Int, sortres=false, skip::Function=always_false) where {V, T <: Number}
check_k(tree, k)
@compat idx = Vector{Int}(uninitialized, k)
@compat dist = Vector{get_T(eltype(V))}(uninitialized, k)
idx = Vector{Int}(undef, k)
dist = Vector{get_T(eltype(V))}(undef, k)
knn_point!(tree, point, sortres, dist, idx, skip)
return idx, dist
end
Expand Down
2 changes: 1 addition & 1 deletion src/tree_data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ struct TreeData
end


function TreeData{V}(data::Vector{V}, leafsize)
function TreeData(data::Vector{V}, leafsize) where V
n_dim, n_p = length(V), length(data)

# If number of points is zero
Expand Down
3 changes: 2 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
using NearestNeighbors
using StaticArrays

using Base.Test
using Test
using LinearAlgebra

import Distances: Metric, evaluate
struct CustomMetric1 <: Metric end
Expand Down
15 changes: 3 additions & 12 deletions test/test_datafreetree.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using Mmap

@testset "datafreetree" begin
data = rand(2,100)
data2 = rand(2,100)
Expand All @@ -7,26 +9,15 @@
@test_throws DimensionMismatch injectdata(t, data3)
for typ in [KDTree, BallTree]
dfilename = tempname()
rfilename = tempname()
d = 2
n = 100
data = Mmap.mmap(dfilename, Matrix{Float32}, (d, n))
data[:] = rand(Float32, d, n)
reorderbuffer = Mmap.mmap(rfilename, Matrix{Float32}, (d, n))
t = injectdata(DataFreeTree(typ, data), data)
tr = injectdata(DataFreeTree(typ, data, reorderbuffer = reorderbuffer), reorderbuffer)
tr = typ(data)
for i = 1:n
@test knn(t, data[:,i], 3) == knn(tr, data[:,i], 3)
end
rm(dfilename)
rm(rfilename)
end

data = rand(2,1000)
buf = zeros(data)
for typ in [KDTree, BallTree]
t = injectdata(DataFreeTree(typ, data, indicesfor = :data), data)
t2 = injectdata(DataFreeTree(typ, data, reorderbuffer = buf, indicesfor = :reordered), buf)
@test data[:,knn(t, data[:,1], 3)[1]] == buf[:,knn(t2, data[:,1], 3)[1]]
end
end

0 comments on commit bfbe593

Please sign in to comment.