Eliminated redundant sum calculation loop (#92)

azagniotov · Mar 4, 2025 · 6e722f1 · 6e722f1
1 parent 6178b98
commit 6e722f1
Showing 1 changed file with 9 additions and 13 deletions.
diff --git a/src/main/java/io/github/azagniotov/language/LanguageDetector.java b/src/main/java/io/github/azagniotov/language/LanguageDetector.java
@@ -168,13 +168,15 @@ private float[] detectBlock(final String input) {
 
         // Smoothing is essential in Naive Bayes to prevent zero probabilities when encountering
         // unseen n-grams.
-        float weight = alphaSmoothing / baseFreq;
+        final float weight = alphaSmoothing / baseFreq;
+        float probSum = 0.0f;
         for (int probIdx = 0; probIdx < probabilities.length; ++probIdx) {
 
           // Multiplying the existing probability of a language by the probability of
           // the n-gram appearing in that language. This aligns strongly with the
           // multiplicative nature of Naive Bayes probability calculations.
           probabilities[probIdx] *= weight + wordProbabilities[probIdx];
+          probSum += probabilities[probIdx];
         }
 
         // Probabilities are normalized and checked for convergence threshold
@@ -188,7 +190,7 @@ private float[] detectBlock(final String input) {
         if (iteration % CONVERGENCE_CHECK_FREQUENCY == 0) {
           // Normalization is often used in probability calculations to ensure that
           // the probabilities sum to 1. This is a standard practice in Naive Bayes.
-          if (normalizeProbabilitiesAndReturnMax(probabilities) > convergenceThreshold) {
+          if (normalizeProbabilitiesAndReturnMax(probSum, probabilities) > convergenceThreshold) {
             break;
           }
         }
@@ -248,21 +250,15 @@ List<String> extractNGrams(final String input) {
    * @return the maximum value found within the normalized probability array. This maximum value is
    *     used as a convergence check, to determine if the probability distribution has stabilized.
    */
-  private float normalizeProbabilitiesAndReturnMax(final float[] prob) {
-    if (prob.length == 0) {
+  private float normalizeProbabilitiesAndReturnMax(final float sump, final float[] prob) {
+    if (prob.length == 0 || sump == 0) {
       return ZERO_PROBABILITY;
     }
-    float sump = prob[0];
-    for (int i = 1; i < prob.length; i++) {
-      sump += prob[i];
-    }
+
     float maxp = ZERO_PROBABILITY;
     for (int i = 0; i < prob.length; i++) {
-      float p = prob[i] / sump;
-      if (maxp < p) {
-        maxp = p;
-      }
-      prob[i] = p;
+      prob[i] = prob[i] / sump;
+      maxp = Math.max(maxp, prob[i]);
     }
     return maxp;
   }