-
Notifications
You must be signed in to change notification settings - Fork 28
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
STATISTICS-88: Add a log uniform distribution
- Loading branch information
Showing
8 changed files
with
611 additions
and
0 deletions.
There are no files selected for viewing
239 changes: 239 additions & 0 deletions
239
...tion/src/main/java/org/apache/commons/statistics/distribution/LogUniformDistribution.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,239 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.commons.statistics.distribution; | ||
|
||
import org.apache.commons.rng.UniformRandomProvider; | ||
import org.apache.commons.rng.sampling.distribution.ContinuousUniformSampler; | ||
import org.apache.commons.rng.sampling.distribution.SharedStateContinuousSampler; | ||
|
||
/** | ||
* Implementation of the log-uniform distribution. This is also known as the reciprocal distribution. | ||
* | ||
* <p>The probability density function of \( X \) is: | ||
* | ||
* <p>\[ f(x; a, b) = \frac{1}{x \ln \frac b a} \] | ||
* | ||
* <p>for \( 0 \lt a \lt b \lt \infty \) and | ||
* \( x \in [a, b] \). | ||
* | ||
* @see <a href="https://en.wikipedia.org/wiki/Reciprocal_distribution">Reciprocal distribution (Wikipedia)</a> | ||
* @since 1.1 | ||
*/ | ||
public final class LogUniformDistribution extends AbstractContinuousDistribution { | ||
/** Lower bound (a) of this distribution (inclusive). */ | ||
private final double lower; | ||
/** Upper bound (b) of this distribution (exclusive). */ | ||
private final double upper; | ||
/** log(a). */ | ||
private final double logA; | ||
/** log(b). */ | ||
private final double logB; | ||
/** log(b) - log(a). */ | ||
private final double logBmLogA; | ||
/** log(log(b) - log(a)). */ | ||
private final double logLogBmLogA; | ||
|
||
/** | ||
* @param lower Lower bound of this distribution (inclusive). | ||
* @param upper Upper bound of this distribution (inclusive). | ||
*/ | ||
private LogUniformDistribution(double lower, | ||
double upper) { | ||
this.lower = lower; | ||
this.upper = upper; | ||
logA = Math.log(lower); | ||
logB = Math.log(upper); | ||
logBmLogA = logB - logA; | ||
logLogBmLogA = Math.log(logBmLogA); | ||
} | ||
|
||
/** | ||
* Creates a log-uniform distribution. | ||
* | ||
* @param lower Lower bound of this distribution (inclusive). | ||
* @param upper Upper bound of this distribution (inclusive). | ||
* @return the distribution | ||
* @throws IllegalArgumentException if {@code lower >= upper}; the range between the bounds | ||
* is not finite; or {@code lower <= 0} | ||
*/ | ||
public static LogUniformDistribution of(double lower, | ||
double upper) { | ||
if (lower >= upper) { | ||
throw new DistributionException(DistributionException.INVALID_RANGE_LOW_GTE_HIGH, | ||
lower, upper); | ||
} | ||
if (!Double.isFinite(upper - lower)) { | ||
throw new DistributionException("Range %s is not finite", upper - lower); | ||
} | ||
if (lower <= 0) { | ||
throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE, lower); | ||
} | ||
return new LogUniformDistribution(lower, upper); | ||
} | ||
|
||
/** {@inheritDoc} */ | ||
@Override | ||
public double density(double x) { | ||
if (x < lower || x > upper) { | ||
return 0; | ||
} | ||
return Math.exp(logDensity(x)); | ||
} | ||
|
||
/** {@inheritDoc} */ | ||
@Override | ||
public double logDensity(double x) { | ||
if (x < lower || x > upper) { | ||
return Double.NEGATIVE_INFINITY; | ||
} | ||
return -Math.log(x) - logLogBmLogA; | ||
} | ||
|
||
/** {@inheritDoc} */ | ||
@Override | ||
public double cumulativeProbability(double x) { | ||
if (x <= lower) { | ||
return 0; | ||
} | ||
if (x >= upper) { | ||
return 1; | ||
} | ||
return (Math.log(x) - logA) / logBmLogA; | ||
} | ||
|
||
/** {@inheritDoc} */ | ||
@Override | ||
public double survivalProbability(double x) { | ||
if (x <= lower) { | ||
return 1; | ||
} | ||
if (x >= upper) { | ||
return 0; | ||
} | ||
return (logB - Math.log(x)) / logBmLogA; | ||
} | ||
|
||
/** {@inheritDoc} */ | ||
@Override | ||
public double inverseCumulativeProbability(double p) { | ||
ArgumentUtils.checkProbability(p); | ||
// Avoid floating-point error at the bounds | ||
return clipToRange(Math.exp(logA + p * logBmLogA)); | ||
} | ||
|
||
@Override | ||
public double inverseSurvivalProbability(double p) { | ||
ArgumentUtils.checkProbability(p); | ||
// Avoid floating-point error at the bounds | ||
return clipToRange(Math.exp(logB - p * logBmLogA)); | ||
} | ||
|
||
/** | ||
* {@inheritDoc} | ||
* | ||
* <p>For lower bound \( a \) and upper bound \( b \), the mean is: | ||
* | ||
* <p>\[ \frac{b - a}{\ln \frac b a} \] | ||
*/ | ||
@Override | ||
public double getMean() { | ||
return (upper - lower) / logBmLogA; | ||
} | ||
|
||
/** | ||
* {@inheritDoc} | ||
* | ||
* <p>For lower bound \( a \) and upper bound \( b \), the variance is: | ||
* | ||
* <p>\[ \frac{b^2 - a^2}{2 \ln \frac b a} - \left( \frac{b - a}{\ln \frac b a} \right)^2 \] | ||
*/ | ||
@Override | ||
public double getVariance() { | ||
// Compute u_2 via a stabilising rearrangement: | ||
// https://docs.scipy.org/doc/scipy/tutorial/stats/continuous_loguniform.html | ||
final double a = lower; | ||
final double b = upper; | ||
final double d = -logBmLogA; | ||
return (a - b) * (a * (d - 2) + b * (d + 2)) / (2 * d * d); | ||
} | ||
|
||
/** | ||
* {@inheritDoc} | ||
* | ||
* <p>The lower bound of the support is equal to the lower bound parameter | ||
* of the distribution. | ||
*/ | ||
@Override | ||
public double getSupportLowerBound() { | ||
return lower; | ||
} | ||
|
||
/** | ||
* {@inheritDoc} | ||
* | ||
* <p>The upper bound of the support is equal to the upper bound parameter | ||
* of the distribution. | ||
*/ | ||
@Override | ||
public double getSupportUpperBound() { | ||
return upper; | ||
} | ||
|
||
/** | ||
* Clip the value to the range [lower, upper]. | ||
* This is used to handle floating-point error at the support bound. | ||
* | ||
* @param x Value x | ||
* @return x clipped to the range | ||
*/ | ||
private double clipToRange(double x) { | ||
return clip(x, lower, upper); | ||
} | ||
|
||
/** | ||
* Clip the value to the range [lower, upper]. | ||
* | ||
* @param x Value x | ||
* @param lower Lower bound (inclusive) | ||
* @param upper Upper bound (inclusive) | ||
* @return x clipped to the range | ||
*/ | ||
private static double clip(double x, double lower, double upper) { | ||
if (x <= lower) { | ||
return lower; | ||
} | ||
return x < upper ? x : upper; | ||
} | ||
|
||
/** {@inheritDoc} */ | ||
@Override | ||
double getMedian() { | ||
// Overridden for the probability(double, double) method. | ||
// This is intentionally not a public method. | ||
// sqrt(ab) avoiding overflow | ||
return Math.exp(0.5 * (logA + logB)); | ||
} | ||
|
||
/** {@inheritDoc} */ | ||
@Override | ||
public ContinuousDistribution.Sampler createSampler(final UniformRandomProvider rng) { | ||
// Exponentiate a uniform distribution sampler of the logarithmic range. | ||
final SharedStateContinuousSampler s = ContinuousUniformSampler.of(rng, logA, logB); | ||
return () -> Math.exp(s.sample()); | ||
} | ||
} |
115 changes: 115 additions & 0 deletions
115
.../src/test/java/org/apache/commons/statistics/distribution/LogUniformDistributionTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.commons.statistics.distribution; | ||
|
||
import java.util.stream.Stream; | ||
import org.junit.jupiter.api.Assertions; | ||
import org.junit.jupiter.params.ParameterizedTest; | ||
import org.junit.jupiter.params.provider.Arguments; | ||
import org.junit.jupiter.params.provider.CsvSource; | ||
import org.junit.jupiter.params.provider.MethodSource; | ||
|
||
/** | ||
* Test cases for {@link LogUniformDistribution}. | ||
* Extends {@link BaseContinuousDistributionTest}. See javadoc of that class for details. | ||
*/ | ||
class LogUniformDistributionTest extends BaseContinuousDistributionTest { | ||
@Override | ||
ContinuousDistribution makeDistribution(Object... parameters) { | ||
final double a = (Double) parameters[0]; | ||
final double b = (Double) parameters[1]; | ||
return LogUniformDistribution.of(a, b); | ||
} | ||
|
||
@Override | ||
Object[][] makeInvalidParameters() { | ||
return new Object[][] { | ||
// lower >= upper | ||
{0.0, 0.0}, | ||
{1.0, 0.5}, | ||
// Range not finite | ||
{Double.NaN, 1.0}, | ||
{0.5, Double.NaN}, | ||
// lower <= 0 | ||
{-1.0, 1.0}, | ||
{0.0, 1.0}, | ||
}; | ||
} | ||
|
||
@Override | ||
String[] getParameterNames() { | ||
return new String[] {"SupportLowerBound", "SupportUpperBound"}; | ||
} | ||
|
||
@Override | ||
protected double getRelativeTolerance() { | ||
return 5e-15; | ||
} | ||
|
||
//-------------------- Additional test cases ------------------------------- | ||
|
||
/** | ||
* Test the moments using the canonical formulas (from the javadoc). | ||
*/ | ||
@ParameterizedTest | ||
@MethodSource | ||
void testAdditionalMoments(double a, double b) { | ||
final double diff = b - a; | ||
final double denom = Math.log(b / a); | ||
final double mean = diff / denom; | ||
// Note: b^2 - a^2 = (b-a)*(b+a) | ||
final double variance = mean * (b + a) / 2 - mean * mean; | ||
TestUtils.assertEquals(mean, LogUniformDistribution.of(a, b).getMean(), | ||
DoubleTolerances.relative(1e-14), "Mean"); | ||
TestUtils.assertEquals(variance, LogUniformDistribution.of(a, b).getVariance(), | ||
DoubleTolerances.relative(1e-10), "Variance"); | ||
} | ||
|
||
static Stream<Arguments> testAdditionalMoments() { | ||
final Stream.Builder<Arguments> builder = Stream.builder(); | ||
for (final double a : new double[] {1, 10, 100}) { | ||
for (final double x : new double[] {10, 20, 40}) { | ||
builder.add(Arguments.of(a, a + x)); | ||
} | ||
} | ||
return builder.build(); | ||
} | ||
|
||
/** | ||
* Test the survival function for extreme values is computed using high precision, | ||
* i.e. is not the default of 1 - CDF. | ||
*/ | ||
@ParameterizedTest | ||
@CsvSource({ | ||
"1e-100, 1e100", | ||
"1e-10, 1e10", | ||
}) | ||
void testExtremeSurvivalFunction(double a, double b) { | ||
final LogUniformDistribution d = LogUniformDistribution.of(a, b); | ||
// Find a small non-zero survival probability | ||
final double u = Math.ulp(b); | ||
int i = 1; | ||
double p; | ||
do { | ||
p = d.survivalProbability(b - i * u); | ||
i *= 2; | ||
} while (p == 0); | ||
Assertions.assertTrue(p < 0x1.0p-53, "sf is not small enough for high precision"); | ||
Assertions.assertNotEquals(1 - d.cumulativeProbability(b - i * u), p, "sf is not high precision: sf == 1 - cdf"); | ||
} | ||
} |
33 changes: 33 additions & 0 deletions
33
...rc/test/resources/org/apache/commons/statistics/distribution/test.loguniform.1.properties
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one or more | ||
# contributor license agreements. See the NOTICE file distributed with | ||
# this work for additional information regarding copyright ownership. | ||
# The ASF licenses this file to You under the Apache License, Version 2.0 | ||
# (the "License"); you may not use this file except in compliance with | ||
# the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
parameters = 1.0 10.0 | ||
|
||
# Computed using scipy.stats reciprocal (v1.13.0) | ||
mean = 3.9086503371292665 | ||
variance = 6.220029396270247 | ||
lower = 1 | ||
upper = 10 | ||
cdf.points = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 | ||
cdf.values = \ | ||
0. , 0.30102999566398114, 0.47712125471966244, \ | ||
0.6020599913279623 , 0.6989700043360187 , 0.7781512503836435 , \ | ||
0.8450980400142567 , 0.9030899869919434 , 0.9542425094393249 , \ | ||
1. | ||
pdf.values = \ | ||
0.43429448190325176 , 0.21714724095162585 , 0.14476482730108392 , \ | ||
0.10857362047581297 , 0.08685889638065038 , 0.07238241365054196 , \ | ||
0.062042068843321696, 0.05428681023790648 , 0.04825494243369463 , \ | ||
0.04342944819032517 |
Oops, something went wrong.