Merge branch 'JuliaStats:master' into type_generic_normal

JuliaStats · Jun 9, 2023 · 7443d7b · 7443d7b
2 parents debbf4f + 2dee35e
commit 7443d7b
Show file tree

Hide file tree

Showing 179 changed files with 5,002 additions and 981 deletions.
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -0,0 +1,7 @@
+version: 2
+updates:
+  # Maintain dependencies for GitHub Actions
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "weekly"
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -32,12 +32,12 @@ jobs:
         arch:
           - x64
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - uses: julia-actions/setup-julia@v1
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
-      - uses: actions/cache@v1
+      - uses: actions/cache@v3
         env:
           cache-name: cache-artifacts
         with:
@@ -55,14 +55,14 @@ jobs:
             Pkg.instantiate()'
       - run: julia --project=perf perf/samplers.jl
       - uses: julia-actions/julia-processcoverage@v1
-      - uses: codecov/codecov-action@v1
+      - uses: codecov/codecov-action@v3
         with:
-          file: lcov.info
+          files: lcov.info
   docs:
     name: Documentation
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - uses: julia-actions/setup-julia@v1
         with:
           version: '1'

diff --git a/.github/workflows/DocPreviewCleanup.yml b/.github/workflows/DocPreviewCleanup.yml
@@ -9,7 +9,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout gh-pages branch
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
         with:
           ref: gh-pages
       - name: Delete preview and history + push changes

diff --git a/.github/workflows/IntegrationTest.yml b/.github/workflows/IntegrationTest.yml
@@ -31,14 +31,14 @@ jobs:
           #- {user: TuringLang, repo: DistributionsAD.jl, group: ForwardDiff} takes > 1 hour
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - uses: julia-actions/setup-julia@v1
         with:
           version: 1
           arch: x64
       - uses: julia-actions/julia-buildpkg@latest
       - name: Clone Downstream
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
         with:
           repository: ${{ matrix.package.user }}/${{ matrix.package.repo }}
           path: downstream

diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "Distributions"
 uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
 authors = ["JuliaStats"]
-version = "0.25.62"
+version = "0.25.95"
 
 [deps]
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
@@ -15,24 +15,32 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
 StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [compat]
 ChainRulesCore = "1"
 DensityInterface = "0.4"
-FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13"
+FillArrays = "0.9, 0.10, 0.11, 0.12, 0.13, 1"
 PDMats = "0.10, 0.11"
 QuadGK = "2"
 SpecialFunctions = "1.2, 2"
-StatsBase = "0.32, 0.33"
+StatsAPI = "1.6"
+StatsBase = "0.32, 0.33, 0.34"
 StatsFuns = "0.9.15, 1"
 julia = "1.3"
 
+[extensions]
+DistributionsChainRulesCoreExt = "ChainRulesCore"
+DistributionsDensityInterfaceExt = "DensityInterface"
+
 [extras]
 Calculus = "49dc2e85-a5d0-5ad3-a950-438e2897f1b9"
+ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
+DensityInterface = "b429d917-457f-4dbc-8f4c-0cc954292b1d"
 Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
@@ -43,4 +51,8 @@ StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["StableRNGs", "Calculus", "ChainRulesTestUtils", "Distributed", "FiniteDifferences", "ForwardDiff", "JSON", "StaticArrays", "Test", "OffsetArrays"]
+test = ["StableRNGs", "Calculus", "ChainRulesCore", "ChainRulesTestUtils", "DensityInterface", "Distributed", "FiniteDifferences", "ForwardDiff", "JSON", "StaticArrays", "Test", "OffsetArrays"]
+
+[weakdeps]
+ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+DensityInterface = "b429d917-457f-4dbc-8f4c-0cc954292b1d"
diff --git a/README.md b/README.md
@@ -13,7 +13,7 @@ A Julia package for probability distributions and associated functions. Particul
 * Moments (e.g mean, variance, skewness, and kurtosis), entropy, and other properties
 * Probability density/mass functions (pdf) and their logarithm (logpdf)
 * Moment generating functions and characteristic functions
-* Sampling from population or from a distribution
+* Sampling from a population or from a distribution
 * Maximum likelihood estimation
 
 **Note:** The functionalities related to conjugate priors have been moved to the [ConjugatePriors package](https://github.com/JuliaStats/ConjugatePriors.jl).
@@ -32,7 +32,7 @@ Also, for casual conversation and quick questions, there are the channels `#help
 
 ### Reporting issues
 
-* If you need help or an explanation how to use *Distributions* ask in the forum (https://discourse.julialang.org) or, for informal questions, visit the chat (https://julialang.slack.com).
+* If you need help or an explanation of how to use *Distributions* ask in the forum (https://discourse.julialang.org) or, for informal questions, visit the chat (https://julialang.slack.com).
 
 If you have a bug linked with *Distributions*, check that it has
 not been reported yet on the issues of the repository.
@@ -42,8 +42,8 @@ which you can get with this command in the Julia REPL:
 julia> ]status Distributions
 ```
 
-Be exhaustive in your report, give the summary of the bug,
-a Minimal Working Example (MWE), what happens and what you
+Be exhaustive in your report, summarize the bug, and provide:
+a Minimal Working Example (MWE), what happens, and what you
 expected to happen.
 
 ### Workflow with Git and GitHub
@@ -61,7 +61,7 @@ the following are required for contributions to be accepted:
 1. Docstrings must be added to all interface and non-trivial functions.
 2. Tests validating the modified behavior in the `test` folder. If new test files are added, do not forget to add them in `test/runtests.jl`. Cover possible edge cases. Run the tests locally before submitting the PR.
 3. At the end of the tests, `Test.detect_ambiguities(Distributions)` is run to check method ambiguities. Verify that your modified code did not yield method ambiguities.
-4. Make according modifications to the `docs` folder, build the documentation locally and verify that your modifications display correctly and did not yield warnings. To build the documentation locally, you first need to instantiate the `docs/` project:
+4. Make corresponding modifications to the `docs` folder, build the documentation locally and verify that your modifications display correctly and did not yield warnings. To build the documentation locally, you first need to instantiate the `docs/` project:
 
        julia --project=docs/
        pkg> instantiate

diff --git a/docs/Project.toml b/docs/Project.toml
@@ -4,4 +4,4 @@ GR = "28b8d3ca-fb5f-59d9-8090-bfdbd6d07a71"
 
 [compat]
 Documenter = "0.26, 0.27"
-GR = "0.61, 0.62, 0.63, 0.64"
+GR = "0.72.1"
diff --git a/docs/make.jl b/docs/make.jl
@@ -17,6 +17,7 @@ makedocs(
         "reshape.md",
         "cholesky.md",
         "mixture.md",
+        "order_statistics.md",
         "convolution.md",
         "fit.md",
         "extends.md",

diff --git a/docs/src/extends.md b/docs/src/extends.md
@@ -1,21 +1,21 @@
 # Create New Samplers and Distributions
 
-Whereas this package already provides a large collection of common distributions out of box, there are still occasions where you want to create new distributions (*e.g* your application requires a special kind of distributions, or you want to contribute to this package).
+Whereas this package already provides a large collection of common distributions out of the box, there are still occasions where you want to create new distributions (*e.g.* your application requires a special kind of distribution, or you want to contribute to this package).
 
 Generally, you don't have to implement every API method listed in the documentation. This package provides a series of generic functions that turn a small number of internal methods into user-end API methods. What you need to do is to implement this small set of internal methods for your distributions.
 
-By default, `Discrete` sampleables have support of type `Int` while `Continuous` sampleables have support of type `Float64`. If this assumption does not hold for your new distribution or sampler, or its `ValueSupport` is neither `Discrete` nor `Continuous`, you should implement the `eltype` method in addition to the other methods listed below.
+By default, `Discrete` sampleables have the support of type `Int` while `Continuous` sampleables have the support of type `Float64`. If this assumption does not hold for your new distribution or sampler, or its `ValueSupport` is neither `Discrete` nor `Continuous`, you should implement the `eltype` method in addition to the other methods listed below.
 
-**Note:** the methods need to be implemented are different for distributions of different variate forms.
+**Note:** The methods that need to be implemented are different for distributions of different variate forms.
 
 
 ## Create a Sampler
 
-Unlike a full fledged distributions, a sampler, in general, only provides limited functionalities, mainly to support sampling.
+Unlike full-fledged distributions, a sampler, in general, only provides limited functionalities, mainly to support sampling.
 
 ### Univariate Sampler
 
-To implement a univariate sampler, one can define a sub type (say `Spl`) of `Sampleable{Univariate,S}` (where `S` can be `Discrete` or `Continuous`), and provide a `rand` method, as
+To implement a univariate sampler, one can define a subtype (say `Spl`) of `Sampleable{Univariate,S}` (where `S` can be `Discrete` or `Continuous`), and provide a `rand` method, as
 
 ```julia
 function rand(rng::AbstractRNG, s::Spl)
@@ -27,7 +27,7 @@ The package already implements a vectorized version of `rand!` and `rand` that r
 
 ### Multivariate Sampler
 
-To implement a multivariate sampler, one can define a sub type of `Sampleable{Multivariate,S}`, and provide both `length` and `_rand!` methods, as
+To implement a multivariate sampler, one can define a subtype of `Sampleable{Multivariate,S}`, and provide both `length` and `_rand!` methods, as
 
 ```julia
 Base.length(s::Spl) = ... # return the length of each sample
@@ -68,7 +68,7 @@ rand(rng::AbstractRNG, s::Sampleable{Multivariate,S}, n::Int) where {S<:ValueSup
     _rand!(rng, s, Matrix{eltype(S)}(length(s), n))
 ```
 
-If there is a more efficient method to generate multiple vector samples in batch, one should provide the following method
+If there is a more efficient method to generate multiple vector samples in a batch, one should provide the following method
 
 ```julia
 function _rand!(rng::AbstractRNG, s::Spl, A::DenseMatrix{T}) where T<:Real
@@ -80,7 +80,7 @@ Remember that each *column* of A is a sample.
 
 ### Matrix-variate Sampler
 
-To implement a multivariate sampler, one can define a sub type of `Sampleable{Multivariate,S}`, and provide both `size` and `_rand!` method, as
+To implement a multivariate sampler, one can define a subtype of `Sampleable{Multivariate,S}`, and provide both `size` and `_rand!` methods, as
 
 ```julia
 Base.size(s::Spl) = ... # the size of each matrix sample
@@ -104,7 +104,7 @@ sampler(d::Distribution)
 
 A univariate distribution type should be defined as a subtype of `DiscreteUnivarateDistribution` or `ContinuousUnivariateDistribution`.
 
-Following methods need to be implemented for each univariate distribution type:
+The following methods need to be implemented for each univariate distribution type:
 
 - [`rand(::AbstractRNG, d::UnivariateDistribution)`](@ref)
 - [`sampler(d::Distribution)`](@ref)
@@ -134,15 +134,15 @@ You may refer to the source file `src/univariates.jl` to see details about how g
 
 A multivariate distribution type should be defined as a subtype of `DiscreteMultivarateDistribution` or `ContinuousMultivariateDistribution`.
 
-Following methods need to be implemented for each multivariate distribution type:
+The following methods need to be implemented for each multivariate distribution type:
 
 - [`length(d::MultivariateDistribution)`](@ref)
 - [`sampler(d::Distribution)`](@ref)
 - [`eltype(d::Distribution)`](@ref)
 - [`Distributions._rand!(::AbstractRNG, d::MultivariateDistribution, x::AbstractArray)`](@ref)
 - [`Distributions._logpdf(d::MultivariateDistribution, x::AbstractArray)`](@ref)
 
-Note that if there exists faster methods for batch evaluation, one should override `_logpdf!` and `_pdf!`.
+Note that if there exist faster methods for batch evaluation, one should override `_logpdf!` and `_pdf!`.
 
 Furthermore, the generic `loglikelihood` function repeatedly calls `_logpdf`. If there is
 a better way to compute the log-likelihood, one should override `loglikelihood`.
@@ -154,13 +154,13 @@ It is also recommended that one also implements the following statistics functio
 - [`entropy(d::MultivariateDistribution)`](@ref)
 - [`cov(d::MultivariateDistribution)`](@ref)
 
-## Create a Matrix-variate Distribution
+## Create a Matrix-Variate Distribution
 
-A multivariate distribution type should be defined as a subtype of `DiscreteMatrixDistribution` or `ContinuousMatrixDistribution`.
+A matrix-variate distribution type should be defined as a subtype of `DiscreteMatrixDistribution` or `ContinuousMatrixDistribution`.
 
-Following methods need to be implemented for each matrix-variate distribution type:
+The following methods need to be implemented for each matrix-variate distribution type:
 
 - [`size(d::MatrixDistribution)`](@ref)
-- [`rand(d::MatrixDistribution)`](@ref)
+- [`Distributions._rand!(rng::AbstractRNG, d::MatrixDistribution, A::AbstractMatrix)`](@ref)
 - [`sampler(d::MatrixDistribution)`](@ref)
 - [`Distributions._logpdf(d::MatrixDistribution, x::AbstractArray)`](@ref)
diff --git a/docs/src/fit.md b/docs/src/fit.md
@@ -10,7 +10,7 @@ This statement fits a distribution of type `D` to a given dataset `x`, where `x`
 
 !!! note
 
-    One can use as first argument simply the distribution name, like `Binomial`,
+    One can use as the first argument simply the distribution name, like `Binomial`,
     or a concrete distribution with a type parameter, like `Normal{Float64}` or
     `Exponential{Float32}`.  However, in the latter case the type parameter of
     the distribution will be ignored:
@@ -61,7 +61,7 @@ The `fit_mle` method has been implemented for the following distributions:
 - [`MvNormal`](@ref)
 - [`Dirichlet`](@ref)
 
-For most of these distributions, the usage is as described above. For a few special distributions that require additional information for estimation, we have to use modified interface:
+For most of these distributions, the usage is as described above. For a few special distributions that require additional information for estimation, we have to use a modified interface:
 
 ```julia
 fit_mle(Binomial, n, x)        # n is the number of trials in each experiment
@@ -76,7 +76,7 @@ fit_mle(Categorical, x, w)
 
 ## Sufficient Statistics
 
-For many distributions, estimation can be based on (sum of) sufficient statistics computed from a dataset. To simplify implementation, for such distributions, we implement `suffstats` method instead of `fit_mle` directly:
+For many distributions, the estimation can be based on (sum of) sufficient statistics computed from a dataset. To simplify implementation, for such distributions, we implement `suffstats` method instead of `fit_mle` directly:
 
 ```julia
 ss = suffstats(D, x)        # ss captures the sufficient statistics of x

diff --git a/docs/src/mixture.md b/docs/src/mixture.md
@@ -1,6 +1,6 @@
 # Mixture Models
 
-A [mixture model](http://en.wikipedia.org/wiki/Mixture_model) is a probabilistic distribution that combines a set of *component* to represent the overall distribution. Generally, the probability density/mass function is given by a convex combination of the pdf/pmf of individual components, as
+A [mixture model](http://en.wikipedia.org/wiki/Mixture_model) is a probabilistic distribution that combines a set of *components* to represent the overall distribution. Generally, the probability density/mass function is given by a convex combination of the pdf/pmf of individual components, as
 
 ```math
 f_{mix}(x; \Theta, \pi) = \sum_{k=1}^K \pi_k f(x; \theta_k)
@@ -27,7 +27,7 @@ const MultivariateMixture  = AbstractMixtureModel{Multivariate}
 
 **Remarks:**
 
-- We introduce `AbstractMixtureModel` as a base type, which allows one to define a mixture model with different internal implementation, while still being able to leverage the common methods defined for `AbstractMixtureModel`.
+- We introduce `AbstractMixtureModel` as a base type, which allows one to define a mixture model with different internal implementations, while still being able to leverage the common methods defined for `AbstractMixtureModel`.
 
 ```@docs
 AbstractMixtureModel
@@ -105,5 +105,5 @@ rand!(::AbstractMixtureModel, ::AbstractArray)
 
 ## Estimation
 
-There are a number of methods for estimating of mixture models from data, and this problem remains an open research topic.
+There are several methods for the estimation of mixture models from data, and this problem remains an open research topic.
 This package does not provide facilities for estimating mixture models. One can resort to other packages, *e.g.* [*GaussianMixtures.jl*](https://github.com/davidavdav/GaussianMixtures.jl), for this purpose.
diff --git a/docs/src/multivariate.md b/docs/src/multivariate.md
@@ -11,7 +11,7 @@ const ContinuousMultivariateDistribution = Distribution{Multivariate, Continuous
 
 ## Common Interface
 
-The methods listed as below are implemented for each multivariate distribution, which provides a consistent interface to work with multivariate distributions.
+The methods listed below are implemented for each multivariate distribution, which provides a consistent interface to work with multivariate distributions.
 
 ### Computation of statistics
 
@@ -35,7 +35,7 @@ pdf(::MultivariateDistribution, ::AbstractArray)
 logpdf(::MultivariateDistribution, ::AbstractArray)
 loglikelihood(::MultivariateDistribution, ::AbstractVector{<:Real})
 ```
-**Note:** For multivariate distributions, the pdf value is usually very small or large, and therefore direct evaluating the pdf may cause numerical problems. It is generally advisable to perform probability computation in log-scale.
+**Note:** For multivariate distributions, the pdf value is usually very small or large, and therefore direct evaluation of the pdf may cause numerical problems. It is generally advisable to perform probability computation in log scale.
 
 
 ### Sampling
@@ -45,7 +45,7 @@ rand(rng::AbstractRNG, ::MultivariateDistribution)
 rand!(rng::AbstractRNG, d::MultivariateDistribution, x::AbstractArray)
 ```
 
-**Note:** In addition to these common methods, each multivariate distribution has its own special methods, as introduced below.
+**Note:** In addition to these common methods, each multivariate distribution has its special methods, as introduced below.
 
 
 ## Distributions
@@ -98,7 +98,7 @@ scale!{D<:Distributions.AbstractMvLogNormal}(::Type{D},s::Symbol,m::AbstractVect
 params{D<:Distributions.AbstractMvLogNormal}(::Type{D},m::AbstractVector,S::AbstractMatrix)
 ```
 
-## Internal Methods (for creating you own multivariate distribution)
+## Internal Methods (for creating your own multivariate distribution)
 
 ```@docs
 Distributions._logpdf(d::MultivariateDistribution, x::AbstractArray)

diff --git a/docs/src/order_statistics.md b/docs/src/order_statistics.md
@@ -0,0 +1,16 @@
+# Order Statistics
+
+The $i$th [Order Statistic](https://en.wikipedia.org/wiki/Order_statistic) of a random sample of size $n$ from a univariate distribution is the $i$th element after sorting in increasing order.
+As a special case, the first and $n$th order statistics are the minimum and maximum of the sample, while for odd $n$, the $\lceil \frac{n}{2} \rceil$th entry is the sample median.
+
+Given any univariate distribution and the sample size $n$, we can construct the distribution of its $i$th order statistic:
+
+```@docs
+OrderStatistic
+```
+
+If we are interested in more than one order statistic, for continuous univariate distributions we can also construct the joint distribution of order statistics:
+
+```@docs
+JointOrderStatistics
+```