From ce19771d2b864f3287f4a60f168fdfd3bbcbd782 Mon Sep 17 00:00:00 2001 From: Alfredo Braunstein Date: Sun, 23 Dec 2018 00:11:22 +0100 Subject: [PATCH 1/5] simpler and faster sprand --- stdlib/SparseArrays/src/sparsematrix.jl | 54 ++++++++----------------- stdlib/SparseArrays/test/sparse.jl | 2 +- 2 files changed, 17 insertions(+), 39 deletions(-) diff --git a/stdlib/SparseArrays/src/sparsematrix.jl b/stdlib/SparseArrays/src/sparsematrix.jl index 7324b368b05a2..d8f45a8cdc22b 100644 --- a/stdlib/SparseArrays/src/sparsematrix.jl +++ b/stdlib/SparseArrays/src/sparsematrix.jl @@ -1382,41 +1382,23 @@ function _sparse_findprevnz(m::SparseMatrixCSC, i::Integer) return LinearIndices(m)[m.rowval[prevhi-1], prevcol] end -function sprand_IJ(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat) - ((m < 0) || (n < 0)) && throw(ArgumentError("invalid Array dimensions")) - 0 <= density <= 1 || throw(ArgumentError("$density not in [0,1]")) - N = n*m - - I, J = Vector{Int}(), Vector{Int}() # indices of nonzero elements - sizehint!(I, round(Int,N*density)) - sizehint!(J, round(Int,N*density)) - - # density of nonzero columns: - L = log1p(-density) - coldensity = -expm1(m*L) # = 1 - (1-density)^m - colsparsity = exp(m*L) # = 1 - coldensity - iL = 1/L - rows = Vector{Int}() - for j in randsubseq(r, 1:n, coldensity) - # To get the right statistics, we *must* have a nonempty column j - # even if p*m << 1. To do this, we use an approach similar to - # the one in randsubseq to compute the expected first nonzero row k, - # except given that at least one is nonzero (via Bayes' rule); - # carefully rearranged to avoid excessive roundoff errors. - k = ceil(log(colsparsity + rand(r)*coldensity) * iL) - ik = k < 1 ? 1 : k > m ? m : Int(k) # roundoff-error/underflow paranoia - randsubseq!(r, rows, 1:m-ik, density) - push!(rows, m-ik+1) - append!(I, rows) - nrows = length(rows) - Jlen = length(J) - resize!(J, Jlen+nrows) - @inbounds for i = Jlen+1:length(J) - J[i] = j +function _sprand(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat, rfn) + (m < 0 || n < 0) && throw(ArgumentError("invalid Array dimensions")) + 0 <= density <= 1 || throw(ArgumentError("$density not in [0,1]")) + j, colm = 1, 0 + rowval = randsubseq(r, 1:(m*n), density) + nnz = length(rowval) + colptr = Vector{Int}(undef, n + 1) + @inbounds for col = 1:n+1 + colptr[col] = j + while j <= nnz && rowval[j] <= colm + m + rowval[j] -= colm + j += 1 end + colm += m end - I, J + return SparseMatrixCSC(m, n, colptr, rowval, rfn(nnz)) end """ @@ -1447,9 +1429,7 @@ function sprand(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat, N = m*n N == 0 && return spzeros(T,m,n) N == 1 && return rand(r) <= density ? sparse([1], [1], rfn(r,1)) : spzeros(T,1,1) - - I,J = sprand_IJ(r, m, n, density) - sparse_IJ_sorted!(I, J, rfn(r,length(I)), m, n, +) # it will never need to combine + _sprand(r,m,n,density,i->rfn(r,i)) end function sprand(m::Integer, n::Integer, density::AbstractFloat, @@ -1458,9 +1438,7 @@ function sprand(m::Integer, n::Integer, density::AbstractFloat, N = m*n N == 0 && return spzeros(T,m,n) N == 1 && return rand() <= density ? sparse([1], [1], rfn(1)) : spzeros(T,1,1) - - I,J = sprand_IJ(GLOBAL_RNG, m, n, density) - sparse_IJ_sorted!(I, J, rfn(length(I)), m, n, +) # it will never need to combine + _sprand(GLOBAL_RNG,m,n,density,rfn) end truebools(r::AbstractRNG, n::Integer) = fill(true, n) diff --git a/stdlib/SparseArrays/test/sparse.jl b/stdlib/SparseArrays/test/sparse.jl index 7784c67baf078..ac231ea3379e8 100644 --- a/stdlib/SparseArrays/test/sparse.jl +++ b/stdlib/SparseArrays/test/sparse.jl @@ -1501,7 +1501,7 @@ end local A = guardseed(1234321) do triu(sprand(10, 10, 0.2)) end - @test SparseArrays.droptol!(A, 0.01).colptr == [1,1,1,2,2,3,4,6,6,7,9] + @test SparseArrays.droptol!(A, 0.01).colptr == [1, 2, 2, 3, 4, 5, 5, 6, 8, 10, 13] @test isequal(SparseArrays.droptol!(sparse([1], [1], [1]), 1), SparseMatrixCSC(1, 1, Int[1, 1], Int[], Int[])) end From aa07fd75506ad3e8c09404539458b67e91a21995 Mon Sep 17 00:00:00 2001 From: Alfredo Braunstein Date: Sun, 23 Dec 2018 09:08:33 +0100 Subject: [PATCH 2/5] doctest fix --- stdlib/SparseArrays/src/sparsematrix.jl | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/stdlib/SparseArrays/src/sparsematrix.jl b/stdlib/SparseArrays/src/sparsematrix.jl index d8f45a8cdc22b..088b872d0f1ca 100644 --- a/stdlib/SparseArrays/src/sparsematrix.jl +++ b/stdlib/SparseArrays/src/sparsematrix.jl @@ -1414,9 +1414,8 @@ argument specifies a random number generator, see [Random Numbers](@ref). # Examples ```jldoctest; setup = :(using Random; Random.seed!(1234)) julia> sprand(Bool, 2, 2, 0.5) -2×2 SparseMatrixCSC{Bool,Int64} with 2 stored entries: - [1, 1] = true - [2, 1] = true +2×2 SparseMatrixCSC{Bool,Int64} with 1 stored entry: + [2, 2] = true julia> sprand(Float64, 3, 0.75) 3-element SparseVector{Float64,Int64} with 1 stored entry: @@ -1465,8 +1464,8 @@ argument specifies a random number generator, see [Random Numbers](@ref). ```jldoctest; setup = :(using Random; Random.seed!(0)) julia> sprandn(2, 2, 0.75) 2×2 SparseMatrixCSC{Float64,Int64} with 2 stored entries: - [1, 1] = 0.586617 - [1, 2] = 0.297336 + [1, 2] = 0.586617 + [2, 2] = 0.297336 ``` """ sprandn(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat) = sprand(r,m,n,density,randn,Float64) From 0b0c20f8478ff8879aacde2f7e766847210efba5 Mon Sep 17 00:00:00 2001 From: Alfredo Braunstein Date: Tue, 25 Dec 2018 16:40:23 +0100 Subject: [PATCH 3/5] shaving about half the time in _sprand --- stdlib/SparseArrays/src/sparsematrix.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stdlib/SparseArrays/src/sparsematrix.jl b/stdlib/SparseArrays/src/sparsematrix.jl index 088b872d0f1ca..a148e2297b7ec 100644 --- a/stdlib/SparseArrays/src/sparsematrix.jl +++ b/stdlib/SparseArrays/src/sparsematrix.jl @@ -1392,10 +1392,10 @@ function _sprand(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat, colptr = Vector{Int}(undef, n + 1) @inbounds for col = 1:n+1 colptr[col] = j - while j <= nnz && rowval[j] <= colm + m - rowval[j] -= colm + while j <= nnz && (rowval[j] -= colm) <= m j += 1 end + j <= nnz && (rowval[j] += colm) colm += m end return SparseMatrixCSC(m, n, colptr, rowval, rfn(nnz)) From 0fec24dc76d10b0fee1673f740f232a507a1a257 Mon Sep 17 00:00:00 2001 From: Alfredo Braunstein Date: Thu, 27 Dec 2018 22:37:18 +0100 Subject: [PATCH 4/5] avoid problems with T<:Integer, T!=Int --- stdlib/SparseArrays/src/sparsematrix.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/stdlib/SparseArrays/src/sparsematrix.jl b/stdlib/SparseArrays/src/sparsematrix.jl index a148e2297b7ec..baac93ca15b2f 100644 --- a/stdlib/SparseArrays/src/sparsematrix.jl +++ b/stdlib/SparseArrays/src/sparsematrix.jl @@ -1384,6 +1384,7 @@ end function _sprand(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat, rfn) + m, n = Int(m), Int(n) (m < 0 || n < 0) && throw(ArgumentError("invalid Array dimensions")) 0 <= density <= 1 || throw(ArgumentError("$density not in [0,1]")) j, colm = 1, 0 From 5a7a4fefcd4c37475f91461c8eefb02cdaadeb4f Mon Sep 17 00:00:00 2001 From: Alfredo Braunstein Date: Thu, 3 Jan 2019 19:19:46 +0100 Subject: [PATCH 5/5] add tests clarifying the output distribution of sprand --- stdlib/SparseArrays/test/sparse.jl | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/stdlib/SparseArrays/test/sparse.jl b/stdlib/SparseArrays/test/sparse.jl index ac231ea3379e8..dfb9aa9a284fa 100644 --- a/stdlib/SparseArrays/test/sparse.jl +++ b/stdlib/SparseArrays/test/sparse.jl @@ -2396,6 +2396,21 @@ end @test m2.module == SparseArrays end +@testset "sprand" begin + p=0.3; m=1000; n=2000; + for s in 1:10 + # build a (dense) random matrix with randsubset + rand + Random.seed!(s); + v = randsubseq(1:m*n,p); + x = zeros(m,n); + x[v] .= rand(length(v)); + # redo the same with sprand + Random.seed!(s); + a = sprand(m,n,p); + @test x == a + end +end + @testset "sprandn with type $T" for T in (Float64, Float32, Float16, ComplexF64, ComplexF32, ComplexF16) @test sprandn(T, 5, 5, 0.5) isa AbstractSparseMatrix{T} end