diff --git a/build.properties b/build.properties index 1884fb1..5a2011c 100644 --- a/build.properties +++ b/build.properties @@ -1,3 +1,3 @@ -extension.version=2 -extension.revision=2 -extension.update=000 +extension.version=2 +extension.revision=3 +extension.update=000 diff --git a/resources/com/rapidminer/resources/OperatorsAnomalyDetection.xml b/resources/com/rapidminer/resources/OperatorsAnomalyDetection.xml index badf66d..865f16a 100644 --- a/resources/com/rapidminer/resources/OperatorsAnomalyDetection.xml +++ b/resources/com/rapidminer/resources/OperatorsAnomalyDetection.xml @@ -74,12 +74,17 @@ - Color Coded Join - de.dfki.madm.anomalydetection.operator.statistical_based.OutlierColorJoin - - + Color Coded Join + de.dfki.madm.anomalydetection.operator.statistical_based.OutlierColorJoin + + + + Robust Principal Component Analysis Anomaly Score (rPCA) + de.dfki.madm.anomalydetection.operator.statistical_based.RobustPCAOperator + + - + One-Class LIBSVM Anomaly Score de.dfki.madm.anomalydetection.operator.kernel_based.AnomalyDetectionLibSVMOperator diff --git a/resources/com/rapidminer/resources/i18n/OperatorsDocAnomalyDetection.xml b/resources/com/rapidminer/resources/i18n/OperatorsDocAnomalyDetection.xml index 349aa9d..458dc62 100644 --- a/resources/com/rapidminer/resources/i18n/OperatorsDocAnomalyDetection.xml +++ b/resources/com/rapidminer/resources/i18n/OperatorsDocAnomalyDetection.xml @@ -309,7 +309,7 @@

- + Color Coded Join Creates color coded Outlier output

@@ -334,7 +334,25 @@

- + + Robust Principal Component Analysis Anomaly Score (rPCA) + Computes an anomaly score based on a robust PCA estimation +

Computes a robust PCA-based anomaly score. For robustness, trimming of the + original data set based on the Mahalanobis distance is performed first. Then, + PCA is computed and a score is determined based on the top upper and/or lower PCs. + This operator follows the papers "A Novel Anomaly Detection Scheme Based on + Principal Component Classifier" by Shyu et al (2003) and "Robust Methods for + Unsupervised PCA-based Anomaly Detection" by Kwitt et al. (2006). In contrast to + the original publications, this operator computes a normalized score instead of + classifying into normal/anomalous instances. +

+ Please be aware of the fact that this method can deal with "one big" cluster only and will + probably fail if normal data consists of multiple clusters (non-linear dependencies) as + well as on local anomaly detection tasks. +

+
+
+ One-Class LIBSVM Anomaly Score Computes the outlier score using one-class SVMs

diff --git a/src/de/dfki/madm/anomalydetection/operator/statistical_based/RobustPCAOperator.java b/src/de/dfki/madm/anomalydetection/operator/statistical_based/RobustPCAOperator.java new file mode 100644 index 0000000..0ae51a7 --- /dev/null +++ b/src/de/dfki/madm/anomalydetection/operator/statistical_based/RobustPCAOperator.java @@ -0,0 +1,383 @@ +/* + * RapidMiner Anomaly Detection Extension + * + * Copyright (C) 2009-2014 by Markus Goldstein or its licensors, as applicable. + * + * This is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * You should have received a copy of the GNU Affero General Public License + * along with this software. If not, see l = new LinkedList(); + double eIdx = 0; + for (Example example : res) { + double md = 0.0; + int aNr = 0; + for ( Attribute attr : example.getAttributes() ) { + double pcscore = example.getValue(attr); + md += (pcscore*pcscore)/model.getEigenvalue(aNr); + aNr++; + } + double[] x = {md, eIdx}; + l.add(x); + eIdx++; + } + Collections.sort(l,new Comparator() { + public int compare(double[] first, double[] second) { + return Double.compare(second[0], first[0]); + } + }); + // Out of the list, create array with outlier-indexes and array (mapping) with good instances. + Iterator iter = l.iterator(); + int[] olMapping = new int[olInst]; + for (int i=0; i < olInst; i++) { + olMapping[i] = (int) ((double[])iter.next())[1]; + } + Arrays.sort(olMapping); + int[] mapping = new int[exampleSet.size()-olInst]; + int olc = 0; + int ctr = 0; + for (int i = 0; i < exampleSet.size(); i++) { + if (olc == olInst) { // Add last elements after last outlier + mapping[ctr++] = i; + continue; + } + if (olMapping[olc] != i) { + mapping[ctr++] = i; + } + else { + olc++; + } + } + ExampleSet robustExampleSet = new MappedExampleSet(exampleSet, mapping); // creates a new example set without the top outliers. + + // --- + // Second PCA (robust) + covarianceMatrix = CovarianceMatrix.getCovarianceMatrix(robustExampleSet); + eigenvalueDecomposition = covarianceMatrix.eig(); + + // create and deliver results + eigenvalues = eigenvalueDecomposition.getRealEigenvalues(); + eigenvectorMatrix = eigenvalueDecomposition.getV(); + eigenvectors = eigenvectorMatrix.getArray(); + + // Apply on original set + model = new PCAModel(exampleSet, eigenvalues, eigenvectors); + + // Perform transformation + res = model.apply((ExampleSet) exampleSet.clone()); + + // Sort eigenvalues + Arrays.sort(eigenvalues); + ArrayUtils.reverse(eigenvalues); + + // if necessary reduce nbr of dimensions ... + int reductionType = getParameterAsInt(PARAMETER_REDUCTION_TYPE); + List pcList = new ArrayList(); + if (reductionType == PCS_ALL) { + for (int i=0; i getParameterTypes() { + List list = super.getParameterTypes(); + + list.add(new ParameterTypeDouble(PARAMETER_OUTLIER_PROBABILITY, PARAMETER_OUTLIER_PROBABILITY_DESCRIPTION, 0, 1.0, 0.975, false)); + + ParameterType type = new ParameterTypeCategory(PARAMETER_REDUCTION_TYPE, PARAMETER_REDUCTION_TYPE_DESCRIPTION, PCS_METHODS, PCS_ALL); + type.setExpert(false); + list.add(type); + + type = new ParameterTypeCategory(PARAMETER_TOP_METHODS, PARAMETER_TOP_METHODS_DESCRIPTION, PCS_TOP_METHODS, PCS_TOP_VAR); + type.setExpert(false); + type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_REDUCTION_TYPE, PCS_METHODS, false, PCS_TOP, PCS_BOTH)); + list.add(type); + + type = new ParameterTypeDouble(PARAMETER_VARIANCE_THRESHOLD, PARAMETER_VARIANCE_THRESHOLD_DESCRIPTION, Double.MIN_VALUE, 1, 0.50); + type.setExpert(false); + type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_REDUCTION_TYPE, PCS_METHODS, true, PCS_TOP, PCS_BOTH)); + type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_TOP_METHODS, PCS_TOP_METHODS, true, PCS_TOP_VAR)); + list.add(type); + + type = new ParameterTypeInt(PARAMETER_NUMBER_OF_COMPONENTS_TOP, PARAMETER_NUMBER_OF_COMPONENTS_TOP_DESCRIPTION, 1, Integer.MAX_VALUE, 1); + type.setExpert(false); + type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_REDUCTION_TYPE, PCS_METHODS, true, PCS_TOP, PCS_BOTH)); + type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_TOP_METHODS, PCS_TOP_METHODS, true, PCS_TOP_FIX)); + list.add(type); + + type = new ParameterTypeCategory(PARAMETER_LOW_METHODS, PARAMETER_LOW_METHODS_DESCRIPTION, PCS_LOW_METHODS, PCS_LOW_VAL); + type.setExpert(false); + type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_REDUCTION_TYPE, PCS_METHODS, false, PCS_LOWER, PCS_BOTH)); + list.add(type); + + type = new ParameterTypeDouble(PARAMETER_VALUE_THRESHOLD, PARAMETER_VALUE_THRESHOLD_DESCRIPTION, 0, Double.MAX_VALUE, 0.20); + type.setExpert(false); + type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_REDUCTION_TYPE, PCS_METHODS, true, PCS_LOWER, PCS_BOTH)); + type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_LOW_METHODS, PCS_LOW_METHODS, true, PCS_LOW_VAL)); + list.add(type); + + type = new ParameterTypeInt(PARAMETER_NUMBER_OF_COMPONENTS_LOW, PARAMETER_NUMBER_OF_COMPONENTS_LOW_DESCRIPTION, 1, Integer.MAX_VALUE, 1); + type.setExpert(false); + type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_REDUCTION_TYPE, PCS_METHODS, true, PCS_LOWER, PCS_BOTH)); + type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_LOW_METHODS, PCS_LOW_METHODS, true, PCS_LOW_FIX)); + list.add(type); + + return list; + } +}