From ef65f43a30800f607cd12bcc239a38df93ae8a11 Mon Sep 17 00:00:00 2001 From: korffmo Date: Mon, 30 Sep 2024 20:52:55 +0200 Subject: [PATCH] Updated Flexophore similarity calculations for synthon similarity. --- .../HistogramMatchCalculator.java | 24 ++++++++----------- ...ctiveBlurFlexophoreHardMatchUncovered.java | 18 ++------------ .../ConstantsFlexophoreGenerator.java | 1 + .../graph/complete/SolutionCompleteGraph.java | 2 +- 4 files changed, 14 insertions(+), 31 deletions(-) diff --git a/src/main/java/com/actelion/research/chem/descriptor/flexophore/completegraphmatcher/HistogramMatchCalculator.java b/src/main/java/com/actelion/research/chem/descriptor/flexophore/completegraphmatcher/HistogramMatchCalculator.java index 8f072941..da3dfb82 100644 --- a/src/main/java/com/actelion/research/chem/descriptor/flexophore/completegraphmatcher/HistogramMatchCalculator.java +++ b/src/main/java/com/actelion/research/chem/descriptor/flexophore/completegraphmatcher/HistogramMatchCalculator.java @@ -40,7 +40,6 @@ * * HistogramMatchCalculator * @author Modest von Korff - * @version 1.0 * Oct 2, 2012 MvK: Start implementation * May 15 2013 MvK: Heavy bug detected. Wrong similarity results. reset() added. * Mar 01 2016 MvK sliding filter added. @@ -48,13 +47,20 @@ */ public class HistogramMatchCalculator { + /** + * + * @param dh1 + * @param indexDistHist1At1 + * @param indexDistHist1At2 + * @param dh2 + * @param indexDistHist2At1 + * @param indexDistHist2At2 + * @return integral of overlap + */ public static double getSimilarity(DistHist dh1, int indexDistHist1At1, int indexDistHist1At2, DistHist dh2, int indexDistHist2At1, int indexDistHist2At2){ - int indexPostStartDistHist1 = dh1.getIndexPosStartForDistHist(indexDistHist1At1, indexDistHist1At2); int indexPostStartDistHist2 = dh2.getIndexPosStartForDistHist(indexDistHist2At1, indexDistHist2At2); - int n = ConstantsFlexophoreGenerator.BINS_HISTOGRAM; - double sumMin = 0; double sumMax = 0; @@ -64,24 +70,17 @@ public static double getSimilarity(DistHist dh1, int indexDistHist1At1, int inde sumMin += Math.min(v1, v2); sumMax += Math.max(v1, v2); } - double score = sumMin / sumMax; - return score; - } public static double getPercentageOverlap(DistHist dh1, int indexDistHist1At1, int indexDistHist1At2, DistHist dh2, int indexDistHist2At1, int indexDistHist2At2){ - int indexPostStartDistHist1 = dh1.getIndexPosStartForDistHist(indexDistHist1At1, indexDistHist1At2); int indexPostStartDistHist2 = dh2.getIndexPosStartForDistHist(indexDistHist2At1, indexDistHist2At2); - int n = ConstantsFlexophoreGenerator.BINS_HISTOGRAM; - double sumMin = 0; double sum1 = 0; double sum2 = 0; - for (int i = 0; i < n; i++) { int v1 = dh1.getValueAtAbsolutePosition(indexPostStartDistHist1+i); int v2 = dh2.getValueAtAbsolutePosition(indexPostStartDistHist2+i); @@ -90,11 +89,8 @@ public static double getPercentageOverlap(DistHist dh1, int indexDistHist1At1, i sum1 += v1; sum2 += v2; } - double score = sumMin / Math.max(sum1, sum2);; - return score; - } diff --git a/src/main/java/com/actelion/research/chem/descriptor/flexophore/completegraphmatcher/ObjectiveBlurFlexophoreHardMatchUncovered.java b/src/main/java/com/actelion/research/chem/descriptor/flexophore/completegraphmatcher/ObjectiveBlurFlexophoreHardMatchUncovered.java index 593a957b..e996036a 100644 --- a/src/main/java/com/actelion/research/chem/descriptor/flexophore/completegraphmatcher/ObjectiveBlurFlexophoreHardMatchUncovered.java +++ b/src/main/java/com/actelion/research/chem/descriptor/flexophore/completegraphmatcher/ObjectiveBlurFlexophoreHardMatchUncovered.java @@ -47,6 +47,7 @@ public class ObjectiveBlurFlexophoreHardMatchUncovered implements IObjectiveComp // Changed to 0.9 21.08.2024 MvK final static double THRESH_NODE_SIMILARITY_START = 0.9; + final static double OPTIMISTIC_HISTOGRAM_THRESH = 0.0; private static final float INIT_VAL = -1; @@ -1128,7 +1129,7 @@ private double getScorePairwiseMapping(int indexNode1Query, int indexNode2Query, double simHists = getSimilarityHistogram(indexNode1Query, indexNode2Query, indexNode1Base, indexNode2Base); if(optimisticHistogramSimilarity) { - if (simHists > 0) { + if (simHists >= OPTIMISTIC_HISTOGRAM_THRESH) { simHists = 1.0; } } @@ -1136,42 +1137,27 @@ private double getScorePairwiseMapping(int indexNode1Query, int indexNode2Query, if(verbose){ System.out.println("simHists " + Formatter.format2(simHists)); } - - // score = simNodePair1 * simNodePair1 * simNodePair2 * simNodePair2 * simHists * simHists * simHists; - score = simNodePair1 * simNodePair1 * simNodePair2 * simNodePair2 * simHists * simHists; - - return score; } public double getSimilarityNodes(int indexNodeQuery, int indexNodeBase) { - if(arrSimilarityNodes[indexNodeQuery][indexNodeBase] < 0 || verbose){ - float similarity = (float)nodeSimilarity.getSimilarity(mdhvQueryBlurredHist.getNode(indexNodeQuery), mdhvBaseBlurredHist.getNode(indexNodeBase)); - arrSimilarityNodes[indexNodeQuery][indexNodeBase]=similarity; } - return arrSimilarityNodes[indexNodeQuery][indexNodeBase]; } public float getSimilarityHistogram(int indexNode1Query, int indexNode2Query, int indexNode1Base, int indexNode2Base) { int indexHistogramQuery = DistHist.getIndex(indexNode1Query, indexNode2Query, nodesQuery); - int indexHistogramBase = DistHist.getIndex(indexNode1Base, indexNode2Base, nodesBase); - if(arrSimilarityHistograms[indexHistogramQuery][indexHistogramBase] < 0){ - float similarityHistogram = 0; - similarityHistogram = (float)HistogramMatchCalculator.getSimilarity( mdhvQueryBlurredHist, indexNode1Query, indexNode2Query, mdhvBaseBlurredHist, indexNode1Base, indexNode2Base); - - arrSimilarityHistograms[indexHistogramQuery][indexHistogramBase]=similarityHistogram; } diff --git a/src/main/java/com/actelion/research/chem/descriptor/flexophore/generator/ConstantsFlexophoreGenerator.java b/src/main/java/com/actelion/research/chem/descriptor/flexophore/generator/ConstantsFlexophoreGenerator.java index cc4c13aa..6cb77b7f 100644 --- a/src/main/java/com/actelion/research/chem/descriptor/flexophore/generator/ConstantsFlexophoreGenerator.java +++ b/src/main/java/com/actelion/research/chem/descriptor/flexophore/generator/ConstantsFlexophoreGenerator.java @@ -30,6 +30,7 @@ public class ConstantsFlexophoreGenerator { */ // public static final double [] FILTER = FILTER05; public static final double [] FILTER07_ = {0.125, 0.125, 0.125,0.25,0.125, 0.125, 0.125}; + public static final double [] FILTER05_ = {0.125, 0.25,0.25,0.25, 0.125}; public static final double [] FILTER = FILTER07_; diff --git a/src/main/java/com/actelion/research/util/graph/complete/SolutionCompleteGraph.java b/src/main/java/com/actelion/research/util/graph/complete/SolutionCompleteGraph.java index ce96e91f..3bfd08b9 100644 --- a/src/main/java/com/actelion/research/util/graph/complete/SolutionCompleteGraph.java +++ b/src/main/java/com/actelion/research/util/graph/complete/SolutionCompleteGraph.java @@ -62,7 +62,7 @@ public class SolutionCompleteGraph extends AMemorizedObject implements Comparabl private byte maxIndexNodeQuery; /** - * The index is the index of the node in the query molecule. + * The index is the index of the node in the query molecule. Not matched query nodes contain a -1. * The value at 'index' is the index of the node in the base molecule. * Contains the same information as heapIndexBase and heapIndexQuery. Used for fast lookup. */