Skip to content

Commit

Permalink
replaced FastMath and MathUtils log with Math.log, which is faster
Browse files Browse the repository at this point in the history
  • Loading branch information
davidbenjamin committed May 12, 2023
1 parent 223599b commit e325e50
Show file tree
Hide file tree
Showing 14 changed files with 25 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ static double hetLogLikelihood(final AlleleFractionGlobalParameters parameters,
- n * log(majorFraction + minorFraction * lambda0RefMinor);
final double refMinorLogLikelihood = logNotPi + logcRefMinor + Gamma.logGamma(rhoRefMinor) - rhoRefMinor * log(tauRefMinor);

final double outlierLogLikelihood = logPi - FastMath.log(a + r + 1) - CombinatoricsUtils.binomialCoefficientLog(a+r,a);
final double outlierLogLikelihood = logPi - Math.log(a + r + 1) - CombinatoricsUtils.binomialCoefficientLog(a+r,a);
return NaturalLogUtils.logSumExp(altMinorLogLikelihood, refMinorLogLikelihood, outlierLogLikelihood);
}

Expand Down Expand Up @@ -161,6 +161,6 @@ private static double biasPosteriorEffectiveBeta(final double lambda0, final dou
}

private static double log(final double x) {
return FastMath.log(Math.max(EPSILON, x));
return Math.log(Math.max(EPSILON, x));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -162,11 +162,11 @@ public CopyRatioState.OutlierIndicators sample(final RandomGenerator rng,
final CopyRatioSegmentedData data) {
logger.debug("Sampling outlier indicators...");
final double outlierUnnormalizedLogProbability =
FastMath.log(state.outlierProbability()) + outlierUniformLogLikelihood;
Math.log(state.outlierProbability()) + outlierUniformLogLikelihood;
// final double notOutlierUnnormalizedLogProbabilityPrefactor =
// FastMath.log(1. - state.outlierProbability()) - 0.5 * FastMath.log(2 * Math.PI * state.variance());
// Math.log(1. - state.outlierProbability()) - 0.5 * Math.log(2 * Math.PI * state.variance());
final double notOutlierUnnormalizedLogProbabilityPrefactor =
FastMath.log((1. - state.outlierProbability()) / FastMath.sqrt(2 * Math.PI * state.variance()));
Math.log((1. - state.outlierProbability()) / FastMath.sqrt(2 * Math.PI * state.variance()));
final List<Boolean> indicators = new ArrayList<>(data.getNumPoints());
for (int segmentIndex = 0; segmentIndex < data.getNumSegments(); segmentIndex++) {
final List<CopyRatioSegmentedData.IndexedCopyRatio> indexedCopyRatiosInSegment = data.getIndexedCopyRatiosInSegment(segmentIndex);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,6 @@ private static double calculateHomozygousLogRatio(final AllelicCount allelicCoun
final double betaOneMinusError = Beta.regularizedBeta(1 - genotypingBaseErrorRate, r + 1, n - r + 1);
final double betaHom = betaError + betaAll - betaOneMinusError;
final double betaHet = betaOneMinusError - betaError;
return FastMath.log(betaHom) - FastMath.log(betaHet);
return Math.log(betaHom) - Math.log(betaHet);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ private static double probability(final PileupSummary site, final double contami
}

private static double segmentLogLikelihood(final List<PileupSummary> segment, final double contamination, final double errorRate, final double minorAlleleFraction) {
return segment.stream().mapToDouble(site -> FastMath.log(MathUtils.sum(genotypeLikelihoods(site, contamination, errorRate, minorAlleleFraction)))).sum();
return segment.stream().mapToDouble(site -> Math.log(MathUtils.sum(genotypeLikelihoods(site, contamination, errorRate, minorAlleleFraction)))).sum();
}

private static double modelLogLikelihood(final List<List<PileupSummary>> segments, final double contamination, final double errorRate, final List<Double> mafs) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ public static int computeMaxAcceptableAlleleCount(final int ploidy, final int ma
if (ploidy == 1) {
return maxGenotypeCount;
}
final double logMaxGenotypeCount = FastMath.log(maxGenotypeCount);
final double logMaxGenotypeCount = Math.log(maxGenotypeCount);

// Math explanation: genotype count is determined by ${P+A-1 \choose A-1}$, this leads to constraint
// $\log(\frac{(P+A-1)!}{(A-1)!}) \le \log(P!G)$,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,22 +88,22 @@ protected static <EVIDENCE, A extends Allele> double[] computeLog10GenotypeLikel
// biallelic het case: log P(reads | nA copies of A, nB copies of B) = sum_{reads} (log[(nA * P(read | A) + nB * P(read | B))] -log(ploidy))
final double[] log10ReadLks1 = log10LikelihoodsByAlleleAndRead[gac.alleleIndexAt(0)];
final int count1 = gac.alleleCountAt(0);
final double log10Count1 = MathUtils.log10(count1);
final double log10Count1 = Math.log10(count1);
final double[] log10ReadLks2 = log10LikelihoodsByAlleleAndRead[gac.alleleIndexAt(1)];
final double log10Count2 = MathUtils.log10(ploidy - count1);
final double log10Count2 = Math.log10(ploidy - count1);

// note: if you are reading the multiallelic case below and have gotten paranoid about cache efficiency,
// here the log10 likelihood matrix rows for *both* alleles are in the cache at once
result[genotypeIndex] = new IndexRange(0, readCount).sum(r -> MathUtils.approximateLog10SumLog10(log10ReadLks1[r] + log10Count1, log10ReadLks2[r] + log10Count2))
- readCount * MathUtils.log10(ploidy);
- readCount * Math.log10(ploidy);
} else {
// the multiallelic case is conceptually the same as the biallelic case but done in non-log space
// We implement in a cache-friendly way by summing nA * P(read|A) over all alleles for each read, but iterating over reads as the inner loop
Arrays.fill(perReadBuffer,0, readCount, 0);
final double[][] rescaledNonLogLikelihoods = rescaledNonLogLikelihoodsAndCorrection.getLeft();
final double log10Rescaling = rescaledNonLogLikelihoodsAndCorrection.getRight();
gac.forEachAlleleIndexAndCount((a, f) -> new IndexRange(0, readCount).forEach(r -> perReadBuffer[r] += f * rescaledNonLogLikelihoods[a][r]));
result[genotypeIndex] = new IndexRange(0, readCount).sum(r -> FastMath.log10(perReadBuffer[r])) - readCount * MathUtils.log10(ploidy) + log10Rescaling;
result[genotypeIndex] = new IndexRange(0, readCount).sum(r -> Math.log10(perReadBuffer[r])) - readCount * Math.log10(ploidy) + log10Rescaling;
}
}
return result;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ private AFCalculationResult calculate(final int numAlleles,
.mapToDouble(a -> a.isReference() ? refPseudocount : (a.length() == refLength ? snpPseudocount : indelPseudocount)).toArray();

double[] alleleCounts = new double[numAlleles];
final double flatLog10AlleleFrequency = -MathUtils.log10(numAlleles); // log10(1/numAlleles)
final double flatLog10AlleleFrequency = -Math.log10(numAlleles); // log10(1/numAlleles)
double[] log10AlleleFrequencies = new IndexRange(0, numAlleles).mapToDouble(n -> flatLog10AlleleFrequency);

for (double alleleCountsMaximumDifference = Double.POSITIVE_INFINITY; alleleCountsMaximumDifference > AlleleFrequencyCalculator.THRESHOLD_FOR_ALLELE_COUNT_CONVERGENCE; ) {
Expand Down Expand Up @@ -312,7 +312,7 @@ private double[] effectiveAlleleCounts(List<Genotype> genotypes, final double[]

for (final GenotypeAlleleCounts gac : GenotypeAlleleCounts.iterable(g.getPloidy(), numAlleles)) {
gac.forEachAlleleIndexAndCount((alleleIndex, count) -> log10Result[alleleIndex] =
MathUtils.log10SumLog10(log10Result[alleleIndex], log10GenotypePosteriors[gac.index()] + MathUtils.log10(count)));
MathUtils.log10SumLog10(log10Result[alleleIndex], log10GenotypePosteriors[gac.index()] + Math.log10(count)));
}
}
return MathUtils.applyToArrayInPlace(log10Result, x -> Math.pow(10.0, x));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ private GenotypeLikelihoods[] initializeIndelPLCache(final int ploidy) {
return indelPLCache[ploidy];
}

final double denominator = - MathUtils.log10(ploidy);
final double denominator = - Math.log10(ploidy);
final GenotypeLikelihoods[] result = new GenotypeLikelihoods[MAX_N_INDEL_INFORMATIVE_READS + 1];

//Note: an array of zeros is the right answer for result[0].
Expand All @@ -376,8 +376,8 @@ private GenotypeLikelihoods[] initializeIndelPLCache(final int ploidy) {
final double[] PLs = new double[ploidy + 1];
PLs[0] = nInformativeReads * NO_INDEL_LIKELIHOOD;
for (int altCount = 1; altCount <= ploidy; altCount++) {
final double refLikelihoodAccum = NO_INDEL_LIKELIHOOD + MathUtils.log10(ploidy - altCount);
final double altLikelihoodAccum = INDEL_LIKELIHOOD + MathUtils.log10(altCount);
final double refLikelihoodAccum = NO_INDEL_LIKELIHOOD + Math.log10(ploidy - altCount);
final double altLikelihoodAccum = INDEL_LIKELIHOOD + Math.log10(altCount);
PLs[altCount] = nInformativeReads * (MathUtils.approximateLog10SumLog10(refLikelihoodAccum ,altLikelihoodAccum) + denominator);
}
result[nInformativeReads] = GenotypeLikelihoods.fromLog10Likelihoods(PLs);
Expand All @@ -404,7 +404,7 @@ public ReferenceConfidenceResult calcGenotypeLikelihoodsOfRefVsAny(final int plo
final boolean readsWereRealigned) {

final int likelihoodCount = ploidy + 1;
final double log10Ploidy = MathUtils.log10(ploidy);
final double log10Ploidy = Math.log10(ploidy);

final RefVsAnyResult result = new RefVsAnyResult(likelihoodCount);
int readCount = 0;
Expand Down Expand Up @@ -491,8 +491,8 @@ private void applyPileupElementRefVsNonRefLikelihoodAndCount(final byte refBase,
for (int i = 1, j = likelihoodCount - 2; i < likelihoodCount - 1; i++, j--) {
result.genotypeLikelihoods[i] +=
MathUtils.approximateLog10SumLog10(
referenceLikelihood + MathUtils.log10(j),
nonRefLikelihood + MathUtils.log10(i));
referenceLikelihood + Math.log10(j),
nonRefLikelihood + Math.log10(i));
}
if (isAlt && hqSoftClips != null && element.isNextToSoftClip()) {
hqSoftClips.add(AlignmentUtils.countHighQualitySoftClips(element.getRead(), HQ_BASE_QUALITY_SOFTCLIP_THRESHOLD));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public KBestHaplotype(final KBestHaplotype<V, E> p, final E edge, final int tota
}

public static double computeLogPenaltyScore(int edgeMultiplicity, int totalOutgoingMultiplicity) {
return MathUtils.log10(edgeMultiplicity) - MathUtils.log10(totalOutgoingMultiplicity);
return Math.log10(edgeMultiplicity) - Math.log10(totalOutgoingMultiplicity);
}

public KBestHaplotype(final KBestHaplotype<V, E> p, final List<E> edgesToExtend, final double edgePenalty) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,7 @@ public static double logLikelihoodRatio(final int nRef, final List<Byte> altQual
betaEntropy = Gamma.logGamma(alpha + beta) - Gamma.logGamma(alpha) - Gamma.logGamma(beta)
- Gamma.logGamma(alpha + beta + n) + Gamma.logGamma(alpha + nAlt) + Gamma.logGamma(beta + nRef);
} else {
betaEntropy = -FastMath.log(n + 1) - CombinatoricsUtils.binomialCoefficientLog(n, nAlt);
betaEntropy = -Math.log(n + 1) - CombinatoricsUtils.binomialCoefficientLog(n, nAlt);
}
return betaEntropy + readSum * repeatFactor;
}
Expand Down
13 changes: 0 additions & 13 deletions src/main/java/org/broadinstitute/hellbender/utils/Log10Cache.java

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ public final class MathUtils {

private static final double ROOT_TWO_PI = Math.sqrt(2.0 * Math.PI);

private static final Log10Cache LOG_10_CACHE = new Log10Cache();
private static final DigammaCache DIGAMMA_CACHE = new DigammaCache();

// represent overflow for computations returning a positive long
Expand Down Expand Up @@ -373,10 +372,6 @@ public static int[] vectorDiff(final int[] x, final int[] y) {
return new IndexRange(0, x.length).mapToInteger(k -> x[k] - y[k]);
}

public static double log10(int i) {
return LOG_10_CACHE.get(i);
}

public static double digamma(int i) {
return DIGAMMA_CACHE.get(i);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
import java.util.Collections;

public class NaturalLogUtils {
public static final double LOG_ONE_HALF = FastMath.log(0.5);
public static final double LOG_ONE_THIRD = FastMath.log(1.0/3);
public static final double LOG_ONE_HALF = Math.log(0.5);
private static final double LOG1MEXP_THRESHOLD = Math.log(0.5);
private static final double PHRED_TO_LOG_ERROR_PROB_FACTOR = -Math.log(10)/10;

Expand Down Expand Up @@ -146,6 +145,6 @@ public static double qualToLogProb(final byte qual) {
}

public static double logSumLog(final double a, final double b) {
return a > b ? a + FastMath.log(1 + FastMath.exp(b - a)) : b + FastMath.log(1 + FastMath.exp(a - b));
return a > b ? a + Math.log(1 + FastMath.exp(b - a)) : b + Math.log(1 + FastMath.exp(a - b));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ private enum AlleleType {
// A snp can go to 3 different bases (standard-nucs - 1), so we normalize SNP lks accordingly. Here is the
// log10 constant used for that:
private static final double LOG10_SNP_NORMALIZATION_CONSTANT =
MathUtils.log10(Nucleotide.STANDARD_BASES.size() - 1);
Math.log10(Nucleotide.STANDARD_BASES.size() - 1);

private final double[] hetValues;
private final double[] homValues;
Expand Down

0 comments on commit e325e50

Please sign in to comment.