Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Commons math removal pvo review #2

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
556134f
initial commit of GammaTest
pogren Jul 25, 2022
3cd120f
added testReductionBiFunction and testMeanVariance to DenseVectorTest
pogren Jul 25, 2022
af33f87
added some addition tests/assertions for cholesky and lu factorization.
pogren Jul 25, 2022
63ee5dd
fixes issue with test matrix for cholensky factorization.
pogren Jul 25, 2022
d30a604
adding some python code to help generate a unit test for
pogren Jul 27, 2022
b3ea277
added printMatrixPythonFriendly to DenseMatrix
pogren Jul 27, 2022
72b87c9
added unit testing for eigendecomposition, setColumn, and selectColumns
pogren Jul 27, 2022
6d6dc3f
cleaning up, standardizing, filling out factorization/decomp tests
pogren Jul 27, 2022
ed6aa12
added test for createIdentity and createDiagonal
pogren Jul 27, 2022
9eb0033
added simple test for DenseSparseMatrix.getColumn
pogren Jul 27, 2022
b7c0abb
ClusteringMetrics.adjustedMI produces same values as sklearn
pogren Jul 28, 2022
f8b2966
add delta to unit test for mi
pogren Jul 28, 2022
9595871
comments demonstrating generating test values in numpy/scypy/sklearn
pogren Jul 28, 2022
10818c1
added comment showing how to generate test
pogren Jul 28, 2022
742e304
initial commit of GammaTest
pogren Jul 25, 2022
afc81a6
added testReductionBiFunction and testMeanVariance to DenseVectorTest
pogren Jul 25, 2022
6a6e464
added some addition tests/assertions for cholesky and lu factorization.
pogren Jul 25, 2022
25c7146
fixes issue with test matrix for cholensky factorization.
pogren Jul 25, 2022
d1c6f9c
adding some python code to help generate a unit test for
pogren Jul 27, 2022
5f77d74
added printMatrixPythonFriendly to DenseMatrix
pogren Jul 27, 2022
4c76b61
added unit testing for eigendecomposition, setColumn, and selectColumns
pogren Jul 27, 2022
5a1bd8a
cleaning up, standardizing, filling out factorization/decomp tests
pogren Jul 27, 2022
89addb3
added test for createIdentity and createDiagonal
pogren Jul 27, 2022
c93cdd3
added simple test for DenseSparseMatrix.getColumn
pogren Jul 27, 2022
b0e472b
ClusteringMetrics.adjustedMI produces same values as sklearn
pogren Jul 28, 2022
70e51f9
add delta to unit test for mi
pogren Jul 28, 2022
d91004d
comments demonstrating generating test values in numpy/scypy/sklearn
pogren Jul 28, 2022
c228de1
added comment showing how to generate test
pogren Jul 28, 2022
6fabb6f
fixes compile errors
pogren Jul 28, 2022
4d2aa61
resolves merge conflict
pogren Jul 28, 2022
7108a8b
reverted adjustedMI to using 'min' approach for calculating denominator
pogren Jul 28, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.tribuo.evaluation.metrics.MetricTarget;
import org.tribuo.util.infotheory.InformationTheory;

import java.util.List;
import java.util.function.BiFunction;

/**
Expand Down Expand Up @@ -65,14 +66,26 @@ public ClusteringMetric forTarget(MetricTarget<ClusterID> tgt) {
* @return The adjusted normalized mutual information.
*/
public static double adjustedMI(ClusteringMetric.Context context) {
double mi = InformationTheory.mi(context.getPredictedIDs(), context.getTrueIDs());
double predEntropy = InformationTheory.entropy(context.getPredictedIDs());
double trueEntropy = InformationTheory.entropy(context.getTrueIDs());
double expectedMI = InformationTheory.expectedMI(context.getPredictedIDs(), context.getTrueIDs());
return adjustedMI(context.getPredictedIDs(), context.getTrueIDs());
}

public static double adjustedMI(List<Integer> predictedIDs, List<Integer> trueIDs) {
double mi = InformationTheory.mi(predictedIDs, trueIDs);
double predEntropy = InformationTheory.entropy(predictedIDs);
double trueEntropy = InformationTheory.entropy(trueIDs);
double expectedMI = InformationTheory.expectedMI(trueIDs, predictedIDs);

double minEntropy = Math.min(predEntropy, trueEntropy);
double denominator = minEntropy - expectedMI;

if(denominator < 0) {
denominator = Math.min(denominator, -2.220446049250313e-16);
}else {
denominator = Math.max(denominator, 2.220446049250313e-16);
}


return (mi - expectedMI) / (minEntropy - expectedMI);
return (mi - expectedMI) / (denominator);
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package org.tribuo.clustering.evaluation;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.tribuo.clustering.evaluation.ClusteringMetrics.adjustedMI;

import java.util.Arrays;
import java.util.List;

import org.junit.jupiter.api.Test;
import org.tribuo.util.infotheory.InformationTheory;

public class ClusteringMetricsTest {

/*
* import numpy as np
* from sklearn.metrics import adjusted_mutual_info_score
* score = adjusted_mutual_info_score([0,0,1,1], [1,0,1,1])
*
* a = np.random.randint(0,2,500)
* #see printArrayAsJavaDoubles in /tribuo-math/src/test/resources/eigendecomposition-test.py
* print(printArrayAsJavaDoubles(a))
* b = np.random.randint(0,2,500)
* print(printArrayAsJavaDoubles(b))
* score = adjusted_mutual_info_score(a, b)
*/
@Test
void testAdjustedMI() throws Exception {
double logBase = InformationTheory.LOG_BASE;
InformationTheory.LOG_BASE = InformationTheory.LOG_E;
List<Integer> a = Arrays.asList(0, 3, 2, 3, 4, 4, 4, 1, 3, 3, 4, 3, 2, 3, 2, 4, 2, 2, 1, 4, 1, 2, 0, 4, 4, 4, 3, 3, 2, 2, 0, 4, 0, 1, 3, 0, 4, 0, 0, 4, 0, 0, 2, 2, 2, 2, 0, 3, 0, 2, 2, 3, 1, 0, 1, 0, 3, 4, 4, 4, 0, 1, 1, 3, 3, 1, 3, 4, 0, 3, 4, 1, 0, 3, 2, 2, 2, 1, 1, 2, 3, 2, 1, 3, 0, 4, 4, 0, 4, 0, 2, 1, 4, 0, 3, 0, 1, 1, 1, 0);
List<Integer> b = Arrays.asList(4, 2, 4, 0, 4, 4, 3, 3, 3, 2, 2, 0, 1, 3, 2, 1, 2, 0, 0, 4, 3, 3, 0, 1, 1, 1, 1, 4, 4, 4, 3, 1, 0, 0, 0, 1, 4, 1, 1, 1, 3, 3, 1, 2, 3, 0, 4, 0, 2, 3, 4, 2, 3, 2, 1, 0, 2, 4, 2, 2, 4, 1, 2, 4, 3, 1, 1, 1, 3, 0, 2, 3, 2, 0, 1, 0, 0, 4, 0, 3, 0, 0, 0, 1, 3, 2, 3, 4, 2, 4, 1, 0, 3, 3, 0, 2, 1, 0, 4, 1);
assertEquals(0.01454420034676734, adjustedMI(a,b), 1e-14);

a = Arrays.asList(1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1);
b = Arrays.asList(1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0);
assertEquals(-0.0014006748276587267, adjustedMI(a,b), 1e-14);

a = Arrays.asList(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 2, 0, 2, 0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 2, 2, 2, 2, 2, 0, 0, 2, 0, 0, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2);
b = Arrays.asList(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 2, 0, 2, 0, 2, 0, 0, 2, 0, 2, 2, 0, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0);
assertEquals(0.31766625364399165, adjustedMI(a,b), 1e-14);

assertEquals(1.0, adjustedMI(Arrays.asList(0,0,1,1), Arrays.asList(0,0,1,1)));
assertEquals(1.0, adjustedMI(Arrays.asList(0,0,1,1), Arrays.asList(1,1,0,0)));
assertEquals(0.0, adjustedMI(Arrays.asList(0,0,0,0), Arrays.asList(1,2,3,4)));
assertEquals(0.0, adjustedMI(Arrays.asList(0,0,1,1), Arrays.asList(1,1,1,1)));
assertEquals(0.0834628172282441, adjustedMI(Arrays.asList(0,0,0,1,0,1,1,1), Arrays.asList(0,0,0,0,1,1,1,1)), 1e-15);
assertEquals(0, adjustedMI(Arrays.asList(1,0,1,1), Arrays.asList(0,0,1,1)), 1e-14);

InformationTheory.LOG_BASE = logBase;
}
}

//used to create third example
//Random rng = new Random();
//a = new ArrayList<>();
//for(int i=0; i<100; i++) {
// int v = rng.nextDouble()*i < 20 ? 0 : i < 50 ? 1 : 2;
// a.add(v);
//}
//System.out.println(a);
Loading