Skip to content

Commit

Permalink
STATISTICS-88: Add a log uniform distribution
Browse files Browse the repository at this point in the history
  • Loading branch information
aherbert committed Aug 16, 2024
1 parent 48e1853 commit 7c6a763
Show file tree
Hide file tree
Showing 8 changed files with 611 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.commons.statistics.distribution;

import org.apache.commons.rng.UniformRandomProvider;
import org.apache.commons.rng.sampling.distribution.ContinuousUniformSampler;
import org.apache.commons.rng.sampling.distribution.SharedStateContinuousSampler;

/**
* Implementation of the log-uniform distribution. This is also known as the reciprocal distribution.
*
* <p>The probability density function of \( X \) is:
*
* <p>\[ f(x; a, b) = \frac{1}{x \ln \frac b a} \]
*
* <p>for \( 0 \lt a \lt b \lt \infty \) and
* \( x \in [a, b] \).
*
* @see <a href="https://en.wikipedia.org/wiki/Reciprocal_distribution">Reciprocal distribution (Wikipedia)</a>
* @since 1.1
*/
public final class LogUniformDistribution extends AbstractContinuousDistribution {
/** Lower bound (a) of this distribution (inclusive). */
private final double lower;
/** Upper bound (b) of this distribution (exclusive). */
private final double upper;
/** log(a). */
private final double logA;
/** log(b). */
private final double logB;
/** log(b) - log(a). */
private final double logBmLogA;
/** log(log(b) - log(a)). */
private final double logLogBmLogA;

/**
* @param lower Lower bound of this distribution (inclusive).
* @param upper Upper bound of this distribution (inclusive).
*/
private LogUniformDistribution(double lower,
double upper) {
this.lower = lower;
this.upper = upper;
logA = Math.log(lower);
logB = Math.log(upper);
logBmLogA = logB - logA;
logLogBmLogA = Math.log(logBmLogA);
}

/**
* Creates a log-uniform distribution.
*
* @param lower Lower bound of this distribution (inclusive).
* @param upper Upper bound of this distribution (inclusive).
* @return the distribution
* @throws IllegalArgumentException if {@code lower >= upper}; the range between the bounds
* is not finite; or {@code lower <= 0}
*/
public static LogUniformDistribution of(double lower,
double upper) {
if (lower >= upper) {
throw new DistributionException(DistributionException.INVALID_RANGE_LOW_GTE_HIGH,
lower, upper);
}
if (!Double.isFinite(upper - lower)) {
throw new DistributionException("Range %s is not finite", upper - lower);
}
if (lower <= 0) {
throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE, lower);
}
return new LogUniformDistribution(lower, upper);
}

/** {@inheritDoc} */
@Override
public double density(double x) {
if (x < lower || x > upper) {
return 0;
}
return Math.exp(logDensity(x));
}

/** {@inheritDoc} */
@Override
public double logDensity(double x) {
if (x < lower || x > upper) {
return Double.NEGATIVE_INFINITY;
}
return -Math.log(x) - logLogBmLogA;
}

/** {@inheritDoc} */
@Override
public double cumulativeProbability(double x) {
if (x <= lower) {
return 0;
}
if (x >= upper) {
return 1;
}
return (Math.log(x) - logA) / logBmLogA;
}

/** {@inheritDoc} */
@Override
public double survivalProbability(double x) {
if (x <= lower) {
return 1;
}
if (x >= upper) {
return 0;
}
return (logB - Math.log(x)) / logBmLogA;
}

/** {@inheritDoc} */
@Override
public double inverseCumulativeProbability(double p) {
ArgumentUtils.checkProbability(p);
// Avoid floating-point error at the bounds
return clipToRange(Math.exp(logA + p * logBmLogA));
}

@Override
public double inverseSurvivalProbability(double p) {
ArgumentUtils.checkProbability(p);
// Avoid floating-point error at the bounds
return clipToRange(Math.exp(logB - p * logBmLogA));
}

/**
* {@inheritDoc}
*
* <p>For lower bound \( a \) and upper bound \( b \), the mean is:
*
* <p>\[ \frac{b - a}{\ln \frac b a} \]
*/
@Override
public double getMean() {
return (upper - lower) / logBmLogA;
}

/**
* {@inheritDoc}
*
* <p>For lower bound \( a \) and upper bound \( b \), the variance is:
*
* <p>\[ \frac{b^2 - a^2}{2 \ln \frac b a} - \left( \frac{b - a}{\ln \frac b a} \right)^2 \]
*/
@Override
public double getVariance() {
// Compute u_2 via a stabilising rearrangement:
// https://docs.scipy.org/doc/scipy/tutorial/stats/continuous_loguniform.html
final double a = lower;
final double b = upper;
final double d = -logBmLogA;
return (a - b) * (a * (d - 2) + b * (d + 2)) / (2 * d * d);
}

/**
* {@inheritDoc}
*
* <p>The lower bound of the support is equal to the lower bound parameter
* of the distribution.
*/
@Override
public double getSupportLowerBound() {
return lower;
}

/**
* {@inheritDoc}
*
* <p>The upper bound of the support is equal to the upper bound parameter
* of the distribution.
*/
@Override
public double getSupportUpperBound() {
return upper;
}

/**
* Clip the value to the range [lower, upper].
* This is used to handle floating-point error at the support bound.
*
* @param x Value x
* @return x clipped to the range
*/
private double clipToRange(double x) {
return clip(x, lower, upper);
}

/**
* Clip the value to the range [lower, upper].
*
* @param x Value x
* @param lower Lower bound (inclusive)
* @param upper Upper bound (inclusive)
* @return x clipped to the range
*/
private static double clip(double x, double lower, double upper) {
if (x <= lower) {
return lower;
}
return x < upper ? x : upper;
}

/** {@inheritDoc} */
@Override
double getMedian() {
// Overridden for the probability(double, double) method.
// This is intentionally not a public method.
// sqrt(ab) avoiding overflow
return Math.exp(0.5 * (logA + logB));
}

/** {@inheritDoc} */
@Override
public ContinuousDistribution.Sampler createSampler(final UniformRandomProvider rng) {
// Exponentiate a uniform distribution sampler of the logarithmic range.
final SharedStateContinuousSampler s = ContinuousUniformSampler.of(rng, logA, logB);
return () -> Math.exp(s.sample());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.commons.statistics.distribution;

import java.util.stream.Stream;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.CsvSource;
import org.junit.jupiter.params.provider.MethodSource;

/**
* Test cases for {@link LogUniformDistribution}.
* Extends {@link BaseContinuousDistributionTest}. See javadoc of that class for details.
*/
class LogUniformDistributionTest extends BaseContinuousDistributionTest {
@Override
ContinuousDistribution makeDistribution(Object... parameters) {
final double a = (Double) parameters[0];
final double b = (Double) parameters[1];
return LogUniformDistribution.of(a, b);
}

@Override
Object[][] makeInvalidParameters() {
return new Object[][] {
// lower >= upper
{0.0, 0.0},
{1.0, 0.5},
// Range not finite
{Double.NaN, 1.0},
{0.5, Double.NaN},
// lower <= 0
{-1.0, 1.0},
{0.0, 1.0},
};
}

@Override
String[] getParameterNames() {
return new String[] {"SupportLowerBound", "SupportUpperBound"};
}

@Override
protected double getRelativeTolerance() {
return 5e-15;
}

//-------------------- Additional test cases -------------------------------

/**
* Test the moments using the canonical formulas (from the javadoc).
*/
@ParameterizedTest
@MethodSource
void testAdditionalMoments(double a, double b) {
final double diff = b - a;
final double denom = Math.log(b / a);
final double mean = diff / denom;
// Note: b^2 - a^2 = (b-a)*(b+a)
final double variance = mean * (b + a) / 2 - mean * mean;
TestUtils.assertEquals(mean, LogUniformDistribution.of(a, b).getMean(),
DoubleTolerances.relative(1e-14), "Mean");
TestUtils.assertEquals(variance, LogUniformDistribution.of(a, b).getVariance(),
DoubleTolerances.relative(1e-10), "Variance");
}

static Stream<Arguments> testAdditionalMoments() {
final Stream.Builder<Arguments> builder = Stream.builder();
for (final double a : new double[] {1, 10, 100}) {
for (final double x : new double[] {10, 20, 40}) {
builder.add(Arguments.of(a, a + x));
}
}
return builder.build();
}

/**
* Test the survival function for extreme values is computed using high precision,
* i.e. is not the default of 1 - CDF.
*/
@ParameterizedTest
@CsvSource({
"1e-100, 1e100",
"1e-10, 1e10",
})
void testExtremeSurvivalFunction(double a, double b) {
final LogUniformDistribution d = LogUniformDistribution.of(a, b);
// Find a small non-zero survival probability
final double u = Math.ulp(b);
int i = 1;
double p;
do {
p = d.survivalProbability(b - i * u);
i *= 2;
} while (p == 0);
Assertions.assertTrue(p < 0x1.0p-53, "sf is not small enough for high precision");
Assertions.assertNotEquals(1 - d.cumulativeProbability(b - i * u), p, "sf is not high precision: sf == 1 - cdf");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

parameters = 1.0 10.0

# Computed using scipy.stats reciprocal (v1.13.0)
mean = 3.9086503371292665
variance = 6.220029396270247
lower = 1
upper = 10
cdf.points = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
cdf.values = \
0. , 0.30102999566398114, 0.47712125471966244, \
0.6020599913279623 , 0.6989700043360187 , 0.7781512503836435 , \
0.8450980400142567 , 0.9030899869919434 , 0.9542425094393249 , \
1.
pdf.values = \
0.43429448190325176 , 0.21714724095162585 , 0.14476482730108392 , \
0.10857362047581297 , 0.08685889638065038 , 0.07238241365054196 , \
0.062042068843321696, 0.05428681023790648 , 0.04825494243369463 , \
0.04342944819032517
Loading

0 comments on commit 7c6a763

Please sign in to comment.