STATISTICS-88: Add a log uniform distribution

apache · Aug 16, 2024 · 7c6a763 · 7c6a763
1 parent 48e1853
commit 7c6a763
Show file tree

Hide file tree

Showing 8 changed files with 611 additions and 0 deletions.
diff --git a/...tion/src/main/java/org/apache/commons/statistics/distribution/LogUniformDistribution.java b/...tion/src/main/java/org/apache/commons/statistics/distribution/LogUniformDistribution.java
@@ -0,0 +1,239 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.statistics.distribution;
+
+import org.apache.commons.rng.UniformRandomProvider;
+import org.apache.commons.rng.sampling.distribution.ContinuousUniformSampler;
+import org.apache.commons.rng.sampling.distribution.SharedStateContinuousSampler;
+
+/**
+ * Implementation of the log-uniform distribution. This is also known as the reciprocal distribution.
+ *
+ * <p>The probability density function of \( X \) is:
+ *
+ * <p>\[ f(x; a, b) = \frac{1}{x \ln \frac b a} \]
+ *
+ * <p>for \( 0 \lt a \lt b \lt \infty \) and
+ * \( x \in [a, b] \).
+ *
+ * @see <a href="https://en.wikipedia.org/wiki/Reciprocal_distribution">Reciprocal distribution (Wikipedia)</a>
+ * @since 1.1
+ */
+public final class LogUniformDistribution extends AbstractContinuousDistribution {
+    /** Lower bound (a) of this distribution (inclusive). */
+    private final double lower;
+    /** Upper bound (b) of this distribution (exclusive). */
+    private final double upper;
+    /** log(a). */
+    private final double logA;
+    /** log(b). */
+    private final double logB;
+    /** log(b) - log(a). */
+    private final double logBmLogA;
+    /** log(log(b) - log(a)). */
+    private final double logLogBmLogA;
+
+    /**
+     * @param lower Lower bound of this distribution (inclusive).
+     * @param upper Upper bound of this distribution (inclusive).
+     */
+    private LogUniformDistribution(double lower,
+                                   double upper) {
+        this.lower = lower;
+        this.upper = upper;
+        logA = Math.log(lower);
+        logB = Math.log(upper);
+        logBmLogA = logB - logA;
+        logLogBmLogA = Math.log(logBmLogA);
+    }
+
+    /**
+     * Creates a log-uniform distribution.
+     *
+     * @param lower Lower bound of this distribution (inclusive).
+     * @param upper Upper bound of this distribution (inclusive).
+     * @return the distribution
+     * @throws IllegalArgumentException if {@code lower >= upper}; the range between the bounds
+     * is not finite; or {@code lower <= 0}
+     */
+    public static LogUniformDistribution of(double lower,
+                                            double upper) {
+        if (lower >= upper) {
+            throw new DistributionException(DistributionException.INVALID_RANGE_LOW_GTE_HIGH,
+                                            lower, upper);
+        }
+        if (!Double.isFinite(upper - lower)) {
+            throw new DistributionException("Range %s is not finite", upper - lower);
+        }
+        if (lower <= 0) {
+            throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE, lower);
+        }
+        return new LogUniformDistribution(lower, upper);
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public double density(double x) {
+        if (x < lower || x > upper) {
+            return 0;
+        }
+        return Math.exp(logDensity(x));
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public double logDensity(double x) {
+        if (x < lower || x > upper) {
+            return Double.NEGATIVE_INFINITY;
+        }
+        return -Math.log(x) - logLogBmLogA;
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public double cumulativeProbability(double x)  {
+        if (x <= lower) {
+            return 0;
+        }
+        if (x >= upper) {
+            return 1;
+        }
+        return (Math.log(x) - logA) / logBmLogA;
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public double survivalProbability(double x) {
+        if (x <= lower) {
+            return 1;
+        }
+        if (x >= upper) {
+            return 0;
+        }
+        return (logB - Math.log(x)) / logBmLogA;
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public double inverseCumulativeProbability(double p) {
+        ArgumentUtils.checkProbability(p);
+        // Avoid floating-point error at the bounds
+        return clipToRange(Math.exp(logA + p * logBmLogA));
+    }
+
+    @Override
+    public double inverseSurvivalProbability(double p) {
+        ArgumentUtils.checkProbability(p);
+        // Avoid floating-point error at the bounds
+        return clipToRange(Math.exp(logB - p * logBmLogA));
+    }
+
+    /**
+     * {@inheritDoc}
+     *
+     * <p>For lower bound \( a \) and upper bound \( b \), the mean is:
+     *
+     * <p>\[ \frac{b - a}{\ln \frac b a} \]
+     */
+    @Override
+    public double getMean() {
+        return (upper - lower) / logBmLogA;
+    }
+
+    /**
+     * {@inheritDoc}
+     *
+     * <p>For lower bound \( a \) and upper bound \( b \), the variance is:
+     *
+     * <p>\[ \frac{b^2 - a^2}{2 \ln \frac b a} - \left( \frac{b - a}{\ln \frac b a} \right)^2 \]
+     */
+    @Override
+    public double getVariance() {
+        // Compute u_2 via a stabilising rearrangement:
+        // https://docs.scipy.org/doc/scipy/tutorial/stats/continuous_loguniform.html
+        final double a = lower;
+        final double b = upper;
+        final double d = -logBmLogA;
+        return (a - b) * (a * (d - 2) + b * (d + 2)) / (2 * d * d);
+    }
+
+    /**
+     * {@inheritDoc}
+     *
+     * <p>The lower bound of the support is equal to the lower bound parameter
+     * of the distribution.
+     */
+    @Override
+    public double getSupportLowerBound() {
+        return lower;
+    }
+
+    /**
+     * {@inheritDoc}
+     *
+     * <p>The upper bound of the support is equal to the upper bound parameter
+     * of the distribution.
+     */
+    @Override
+    public double getSupportUpperBound() {
+        return upper;
+    }
+
+    /**
+     * Clip the value to the range [lower, upper].
+     * This is used to handle floating-point error at the support bound.
+     *
+     * @param x Value x
+     * @return x clipped to the range
+     */
+    private double clipToRange(double x) {
+        return clip(x, lower, upper);
+    }
+
+    /**
+     * Clip the value to the range [lower, upper].
+     *
+     * @param x Value x
+     * @param lower Lower bound (inclusive)
+     * @param upper Upper bound (inclusive)
+     * @return x clipped to the range
+     */
+    private static double clip(double x, double lower, double upper) {
+        if (x <= lower) {
+            return lower;
+        }
+        return x < upper ? x : upper;
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    double getMedian() {
+        // Overridden for the probability(double, double) method.
+        // This is intentionally not a public method.
+        // sqrt(ab) avoiding overflow
+        return Math.exp(0.5 * (logA + logB));
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public ContinuousDistribution.Sampler createSampler(final UniformRandomProvider rng) {
+        // Exponentiate a uniform distribution sampler of the logarithmic range.
+        final SharedStateContinuousSampler s = ContinuousUniformSampler.of(rng, logA, logB);
+        return () -> Math.exp(s.sample());
+    }
+}
diff --git a/.../src/test/java/org/apache/commons/statistics/distribution/LogUniformDistributionTest.java b/.../src/test/java/org/apache/commons/statistics/distribution/LogUniformDistributionTest.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.statistics.distribution;
+
+import java.util.stream.Stream;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.CsvSource;
+import org.junit.jupiter.params.provider.MethodSource;
+
+/**
+ * Test cases for {@link LogUniformDistribution}.
+ * Extends {@link BaseContinuousDistributionTest}. See javadoc of that class for details.
+ */
+class LogUniformDistributionTest extends BaseContinuousDistributionTest {
+    @Override
+    ContinuousDistribution makeDistribution(Object... parameters) {
+        final double a = (Double) parameters[0];
+        final double b = (Double) parameters[1];
+        return LogUniformDistribution.of(a, b);
+    }
+
+    @Override
+    Object[][] makeInvalidParameters() {
+        return new Object[][] {
+            // lower >= upper
+            {0.0, 0.0},
+            {1.0, 0.5},
+            // Range not finite
+            {Double.NaN, 1.0},
+            {0.5, Double.NaN},
+            // lower <= 0
+            {-1.0, 1.0},
+            {0.0, 1.0},
+        };
+    }
+
+    @Override
+    String[] getParameterNames() {
+        return new String[] {"SupportLowerBound", "SupportUpperBound"};
+    }
+
+    @Override
+    protected double getRelativeTolerance() {
+        return 5e-15;
+    }
+
+    //-------------------- Additional test cases -------------------------------
+
+    /**
+     * Test the moments using the canonical formulas (from the javadoc).
+     */
+    @ParameterizedTest
+    @MethodSource
+    void testAdditionalMoments(double a, double b) {
+        final double diff = b - a;
+        final double denom = Math.log(b / a);
+        final double mean = diff / denom;
+        // Note: b^2 - a^2 = (b-a)*(b+a)
+        final double variance = mean * (b + a) / 2 - mean * mean;
+        TestUtils.assertEquals(mean, LogUniformDistribution.of(a, b).getMean(),
+            DoubleTolerances.relative(1e-14), "Mean");
+        TestUtils.assertEquals(variance, LogUniformDistribution.of(a, b).getVariance(),
+            DoubleTolerances.relative(1e-10), "Variance");
+    }
+
+    static Stream<Arguments> testAdditionalMoments() {
+        final Stream.Builder<Arguments> builder = Stream.builder();
+        for (final double a : new double[] {1, 10, 100}) {
+            for (final double x : new double[] {10, 20, 40}) {
+                builder.add(Arguments.of(a, a + x));
+            }
+        }
+        return builder.build();
+    }
+
+    /**
+     * Test the survival function for extreme values is computed using high precision,
+     * i.e. is not the default of 1 - CDF.
+     */
+    @ParameterizedTest
+    @CsvSource({
+        "1e-100, 1e100",
+        "1e-10, 1e10",
+    })
+    void testExtremeSurvivalFunction(double a, double b) {
+        final LogUniformDistribution d = LogUniformDistribution.of(a, b);
+        // Find a small non-zero survival probability
+        final double u = Math.ulp(b);
+        int i = 1;
+        double p;
+        do {
+            p = d.survivalProbability(b - i * u);
+            i *= 2;
+        } while (p == 0);
+        Assertions.assertTrue(p < 0x1.0p-53, "sf is not small enough for high precision");
+        Assertions.assertNotEquals(1 - d.cumulativeProbability(b - i * u), p, "sf is not high precision: sf == 1 - cdf");
+    }
+}
diff --git a/...rc/test/resources/org/apache/commons/statistics/distribution/test.loguniform.1.properties b/...rc/test/resources/org/apache/commons/statistics/distribution/test.loguniform.1.properties
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+parameters = 1.0 10.0
+
+# Computed using scipy.stats reciprocal (v1.13.0)
+mean = 3.9086503371292665
+variance = 6.220029396270247
+lower = 1
+upper = 10
+cdf.points = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10
+cdf.values = \
+  0.                 , 0.30102999566398114, 0.47712125471966244, \
+  0.6020599913279623 , 0.6989700043360187 , 0.7781512503836435 , \
+  0.8450980400142567 , 0.9030899869919434 , 0.9542425094393249 , \
+  1.
+pdf.values = \
+  0.43429448190325176 , 0.21714724095162585 , 0.14476482730108392 , \
+  0.10857362047581297 , 0.08685889638065038 , 0.07238241365054196 , \
+  0.062042068843321696, 0.05428681023790648 , 0.04825494243369463 , \
+  0.04342944819032517