From ce19771d2b864f3287f4a60f168fdfd3bbcbd782 Mon Sep 17 00:00:00 2001
From: Alfredo Braunstein <alfredo.braunstein@polito.it>
Date: Sun, 23 Dec 2018 00:11:22 +0100
Subject: [PATCH 1/5] simpler and faster sprand

---
 stdlib/SparseArrays/src/sparsematrix.jl | 54 ++++++++-----------------
 stdlib/SparseArrays/test/sparse.jl      |  2 +-
 2 files changed, 17 insertions(+), 39 deletions(-)

diff --git a/stdlib/SparseArrays/src/sparsematrix.jl b/stdlib/SparseArrays/src/sparsematrix.jl
index 7324b368b05a2..d8f45a8cdc22b 100644
--- a/stdlib/SparseArrays/src/sparsematrix.jl
+++ b/stdlib/SparseArrays/src/sparsematrix.jl
@@ -1382,41 +1382,23 @@ function _sparse_findprevnz(m::SparseMatrixCSC, i::Integer)
     return LinearIndices(m)[m.rowval[prevhi-1], prevcol]
 end
 
-function sprand_IJ(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat)
-    ((m < 0) || (n < 0)) && throw(ArgumentError("invalid Array dimensions"))
-    0 <= density <= 1 || throw(ArgumentError("$density not in [0,1]"))
-    N = n*m
-
-    I, J = Vector{Int}(), Vector{Int}() # indices of nonzero elements
-    sizehint!(I, round(Int,N*density))
-    sizehint!(J, round(Int,N*density))
-
-    # density of nonzero columns:
-    L = log1p(-density)
-    coldensity = -expm1(m*L) # = 1 - (1-density)^m
-    colsparsity = exp(m*L) # = 1 - coldensity
-    iL = 1/L
 
-    rows = Vector{Int}()
-    for j in randsubseq(r, 1:n, coldensity)
-        # To get the right statistics, we *must* have a nonempty column j
-        # even if p*m << 1.   To do this, we use an approach similar to
-        # the one in randsubseq to compute the expected first nonzero row k,
-        # except given that at least one is nonzero (via Bayes' rule);
-        # carefully rearranged to avoid excessive roundoff errors.
-        k = ceil(log(colsparsity + rand(r)*coldensity) * iL)
-        ik = k < 1 ? 1 : k > m ? m : Int(k) # roundoff-error/underflow paranoia
-        randsubseq!(r, rows, 1:m-ik, density)
-        push!(rows, m-ik+1)
-        append!(I, rows)
-        nrows = length(rows)
-        Jlen = length(J)
-        resize!(J, Jlen+nrows)
-        @inbounds for i = Jlen+1:length(J)
-            J[i] = j
+function _sprand(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat, rfn)
+    (m < 0 || n < 0) && throw(ArgumentError("invalid Array dimensions"))
+    0 <= density <= 1 || throw(ArgumentError("$density not in [0,1]"))
+    j, colm  = 1, 0
+    rowval = randsubseq(r, 1:(m*n), density)
+    nnz = length(rowval)
+    colptr = Vector{Int}(undef, n + 1)
+    @inbounds for col = 1:n+1
+        colptr[col] = j
+        while j <= nnz && rowval[j] <= colm + m
+            rowval[j] -= colm
+            j += 1
         end
+        colm += m
     end
-    I, J
+    return SparseMatrixCSC(m, n, colptr, rowval, rfn(nnz))
 end
 
 """
@@ -1447,9 +1429,7 @@ function sprand(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat,
     N = m*n
     N == 0 && return spzeros(T,m,n)
     N == 1 && return rand(r) <= density ? sparse([1], [1], rfn(r,1)) : spzeros(T,1,1)
-
-    I,J = sprand_IJ(r, m, n, density)
-    sparse_IJ_sorted!(I, J, rfn(r,length(I)), m, n, +)  # it will never need to combine
+    _sprand(r,m,n,density,i->rfn(r,i))
 end
 
 function sprand(m::Integer, n::Integer, density::AbstractFloat,
@@ -1458,9 +1438,7 @@ function sprand(m::Integer, n::Integer, density::AbstractFloat,
     N = m*n
     N == 0 && return spzeros(T,m,n)
     N == 1 && return rand() <= density ? sparse([1], [1], rfn(1)) : spzeros(T,1,1)
-
-    I,J = sprand_IJ(GLOBAL_RNG, m, n, density)
-    sparse_IJ_sorted!(I, J, rfn(length(I)), m, n, +)  # it will never need to combine
+    _sprand(GLOBAL_RNG,m,n,density,rfn)
 end
 
 truebools(r::AbstractRNG, n::Integer) = fill(true, n)
diff --git a/stdlib/SparseArrays/test/sparse.jl b/stdlib/SparseArrays/test/sparse.jl
index 7784c67baf078..ac231ea3379e8 100644
--- a/stdlib/SparseArrays/test/sparse.jl
+++ b/stdlib/SparseArrays/test/sparse.jl
@@ -1501,7 +1501,7 @@ end
     local A = guardseed(1234321) do
         triu(sprand(10, 10, 0.2))
     end
-    @test SparseArrays.droptol!(A, 0.01).colptr == [1,1,1,2,2,3,4,6,6,7,9]
+    @test SparseArrays.droptol!(A, 0.01).colptr ==  [1, 2, 2, 3, 4, 5, 5, 6, 8, 10, 13]
     @test isequal(SparseArrays.droptol!(sparse([1], [1], [1]), 1), SparseMatrixCSC(1, 1, Int[1, 1], Int[], Int[]))
 end
 

From aa07fd75506ad3e8c09404539458b67e91a21995 Mon Sep 17 00:00:00 2001
From: Alfredo Braunstein <alfredo.braunstein@polito.it>
Date: Sun, 23 Dec 2018 09:08:33 +0100
Subject: [PATCH 2/5] doctest fix

---
 stdlib/SparseArrays/src/sparsematrix.jl | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/stdlib/SparseArrays/src/sparsematrix.jl b/stdlib/SparseArrays/src/sparsematrix.jl
index d8f45a8cdc22b..088b872d0f1ca 100644
--- a/stdlib/SparseArrays/src/sparsematrix.jl
+++ b/stdlib/SparseArrays/src/sparsematrix.jl
@@ -1414,9 +1414,8 @@ argument specifies a random number generator, see [Random Numbers](@ref).
 # Examples
 ```jldoctest; setup = :(using Random; Random.seed!(1234))
 julia> sprand(Bool, 2, 2, 0.5)
-2×2 SparseMatrixCSC{Bool,Int64} with 2 stored entries:
-  [1, 1]  =  true
-  [2, 1]  =  true
+2×2 SparseMatrixCSC{Bool,Int64} with 1 stored entry:
+  [2, 2]  =  true
 
 julia> sprand(Float64, 3, 0.75)
 3-element SparseVector{Float64,Int64} with 1 stored entry:
@@ -1465,8 +1464,8 @@ argument specifies a random number generator, see [Random Numbers](@ref).
 ```jldoctest; setup = :(using Random; Random.seed!(0))
 julia> sprandn(2, 2, 0.75)
 2×2 SparseMatrixCSC{Float64,Int64} with 2 stored entries:
-  [1, 1]  =  0.586617
-  [1, 2]  =  0.297336
+  [1, 2]  =  0.586617
+  [2, 2]  =  0.297336
 ```
 """
 sprandn(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat) = sprand(r,m,n,density,randn,Float64)

From 0b0c20f8478ff8879aacde2f7e766847210efba5 Mon Sep 17 00:00:00 2001
From: Alfredo Braunstein <alfredo.braunstein@polito.it>
Date: Tue, 25 Dec 2018 16:40:23 +0100
Subject: [PATCH 3/5] shaving about half the time in _sprand

---
 stdlib/SparseArrays/src/sparsematrix.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stdlib/SparseArrays/src/sparsematrix.jl b/stdlib/SparseArrays/src/sparsematrix.jl
index 088b872d0f1ca..a148e2297b7ec 100644
--- a/stdlib/SparseArrays/src/sparsematrix.jl
+++ b/stdlib/SparseArrays/src/sparsematrix.jl
@@ -1392,10 +1392,10 @@ function _sprand(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat,
     colptr = Vector{Int}(undef, n + 1)
     @inbounds for col = 1:n+1
         colptr[col] = j
-        while j <= nnz && rowval[j] <= colm + m
-            rowval[j] -= colm
+        while j <= nnz && (rowval[j] -= colm) <= m
             j += 1
         end
+        j <= nnz && (rowval[j] += colm)
         colm += m
     end
     return SparseMatrixCSC(m, n, colptr, rowval, rfn(nnz))

From 0fec24dc76d10b0fee1673f740f232a507a1a257 Mon Sep 17 00:00:00 2001
From: Alfredo Braunstein <alfredo.braunstein@polito.it>
Date: Thu, 27 Dec 2018 22:37:18 +0100
Subject: [PATCH 4/5] avoid problems with T<:Integer, T!=Int

---
 stdlib/SparseArrays/src/sparsematrix.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/stdlib/SparseArrays/src/sparsematrix.jl b/stdlib/SparseArrays/src/sparsematrix.jl
index a148e2297b7ec..baac93ca15b2f 100644
--- a/stdlib/SparseArrays/src/sparsematrix.jl
+++ b/stdlib/SparseArrays/src/sparsematrix.jl
@@ -1384,6 +1384,7 @@ end
 
 
 function _sprand(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat, rfn)
+    m, n = Int(m), Int(n)
     (m < 0 || n < 0) && throw(ArgumentError("invalid Array dimensions"))
     0 <= density <= 1 || throw(ArgumentError("$density not in [0,1]"))
     j, colm  = 1, 0

From 5a7a4fefcd4c37475f91461c8eefb02cdaadeb4f Mon Sep 17 00:00:00 2001
From: Alfredo Braunstein <alfredo.braunstein@polito.it>
Date: Thu, 3 Jan 2019 19:19:46 +0100
Subject: [PATCH 5/5] add tests clarifying the output distribution of sprand

---
 stdlib/SparseArrays/test/sparse.jl | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/stdlib/SparseArrays/test/sparse.jl b/stdlib/SparseArrays/test/sparse.jl
index ac231ea3379e8..dfb9aa9a284fa 100644
--- a/stdlib/SparseArrays/test/sparse.jl
+++ b/stdlib/SparseArrays/test/sparse.jl
@@ -2396,6 +2396,21 @@ end
     @test m2.module == SparseArrays
 end
 
+@testset "sprand" begin
+    p=0.3; m=1000; n=2000;
+    for s in 1:10
+        # build a (dense) random matrix with randsubset + rand
+        Random.seed!(s);
+        v = randsubseq(1:m*n,p);
+        x = zeros(m,n);
+        x[v] .= rand(length(v));
+        # redo the same with sprand
+        Random.seed!(s);
+        a = sprand(m,n,p);
+        @test x == a
+    end
+end
+
 @testset "sprandn with type $T" for T in (Float64, Float32, Float16, ComplexF64, ComplexF32, ComplexF16)
     @test sprandn(T, 5, 5, 0.5) isa AbstractSparseMatrix{T}
 end